#!/usr/bin/perl
use strict;
use warnings;

our $VERSION = '0.4';
use Encode qw/decode_utf8 encode_utf8/;
use Unicode::UCD qw/charinfo prop_invmap/;
use List::Util qw/max sum/;
use Getopt::Long qw/:config no_ignore_case/;

binmode STDOUT, ':encoding(UTF-8)';

GetOptions
    'char|c' => \my $opt_char,
    'decimal|d!' => \my $opt_decimal,
    'hex|x!' => \my $opt_hex,
    'hex-escape|X!' => \my $opt_hex_escaped,
    'name|n!' => \my $opt_name,
    'ascii|a!' => \my $opt_ascii,
    'help|h!' => \my $opt_help,
    'version|v!' => \my $opt_version
        or die usage();

die __FILE__ =~ s{.*/}{}r . " v$VERSION\n" if $opt_version;

my $set_opt_count = sum map { $_ // 0 } $opt_char, $opt_decimal, $opt_hex, $opt_hex_escaped, $opt_name, $opt_help;

die usage("Given options exclude each other") if $set_opt_count > 1;

die usage() if $opt_help || !@ARGV;

$opt_char = 1 if $set_opt_count == 0;

my @args;
if ($opt_decimal) {
    @args = [ map { chr } @ARGV ];
}
elsif ($opt_hex) {
    @args = [ map { chr hex $_ } @ARGV ];
}
elsif ($opt_hex_escaped) {
    @args = map { [ split //, decode_utf8_or_not($opt_ascii, pack 'H*', join '', /(?:\\x)?([0-9a-f]+)/gi) ] } @ARGV;
}
elsif ($opt_name) {
    @args = lookup_by_name(@ARGV);
}
elsif ($opt_char) {
    @args = map { [ split //, decode_utf8_or_not($opt_ascii, $_) ] } @ARGV;
}
else {
    die usage("No option passed, do not know what to do");
}

my @data;
my $codepoint_maxlen = 4;
my $utf8_maxlen = 2;
my $name_maxlen = 1;

my $nrun = 0;
for my $run (@args) {
    push @data, undef if $nrun++ > 0;

    for my $char (@$run) {
        my $codepoint = ord $char;
        my $charinfo = charinfo $codepoint;
        my $name = $charinfo->{name} || "NONEXISTENT CHAR";
        my $utf8 = join ' ', map { sprintf "%x", ord $_ } split //, encode_utf8 $char;
        $codepoint_maxlen = max $codepoint_maxlen, length sprintf '%X', $codepoint;
        $utf8_maxlen = max $utf8_maxlen, length $utf8;
        $name_maxlen = max $name_maxlen, length $name;

        push @data, {
            char => $name eq '<control>'
                ? join '', map "\\x$_", split ' ', $utf8
                : $char,
            codepoint => $codepoint,
            utf8 => $utf8,
            name => $name,
        };
    }
}

for my $line (@data) {
    if ($line) {
        printf
            "U+%0${codepoint_maxlen}X (%-${utf8_maxlen}s): %-${name_maxlen}s [%s]\n",
                $line->{codepoint},
                $line->{utf8},
                $line->{name},
                $line->{char};
    }
    else {
        print "\n";
    }
}

sub usage {
    my $name = __FILE__ =~ s{.*/}{}r;
    print "@_\n" if @_;
    <<~"EOS";
    $name [options] [mode] ...
        modes:
        -c <literal string> ... (this is the default)
        -d <decimal code point> ...
        -x <hexadecimal code point> ...
        -X <string containing only hexadecimal escapes \\xHH> ...
        -n <character name fragment> ...
        options:
        -a - treat input as bytes instead of utf8
        -h - this help
        -v - version
    v$VERSION
    EOS
}

sub lookup_by_name {
    my @search_terms = @_;

    my %cp;
    # All codepoints
    for my $cat (qw(Name Name_Alias)) {
        my ($codepoints, $names, $format, $default) = prop_invmap($cat);
        # $format => "n", $default => ""
        for my $i (0 .. @$codepoints - 2) {
            my ($cp, $n) = ($codepoints->[$i], $names->[$i]);
            # If $n is a ref, the same codepoint has multiple names
            for my $name (ref $n ? @$n : $n) {
                $cp{$name} //= $cp;
            }
        }
    }

    my @names = keys %cp;
    for my $term (@search_terms) {
        @names = grep { /$term/i } @names;
    }

    return [ map chr, sort { $a <=> $b } @cp{@names} ];
}

sub decode_utf8_or_not {
    my ($no_dont_do_it, $string) = @_;
    return $no_dont_do_it ? $string : decode_utf8($string);
}

[–] asudox@discuss.tchncs.de 3 points 11 months ago (1 children)

our version? now that is some syntax perl has.

[–] huf@hexbear.net 3 points 11 months ago

it's a lexical alias to a package variable. super simple stuff.

[–] m_f@midwest.social 1 points 11 months ago (1 children)

Thanks! Now I'm curious, do you write Perl regularly? I don't see too much of it, my sense is that it's still around in sysadmin type work though

[–] huf@hexbear.net 2 points 11 months ago

i write little things like the above in it for myself, but i've never used it much at work. but yeah, it's long past its peak and unlikely to ever recover, although it does still have a community using it and writing new libraries for it, maintaining and developing the core language.

it's a great language, i love it.