Created
July 27, 2025 03:46
-
-
Save mwgamera/97cc4bc29307edbc30234b20f5351b37 to your computer and use it in GitHub Desktop.
๐๐ฑ๐๐พ๐ฏ ๐๐น๐๐ฆ๐, ๐๐ฑ๐ n+1. ยท๐๐ป๐คโ๐ UCA ๐ฅ๐ช๐ก๐ต๐ค ๐ฅ๐ฑ๐๐ ๐ฆ๐ ๐ฅ๐น ๐ฉ๐ฏ๐ถ๐ฆ๐ ๐๐จ๐ฏ ICU ๐น glibc ๐ค๐ด๐๐ญ๐ค
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| # ๐๐น๐๐ฆ๐ ๐๐ฑ๐๐พ๐ฏ ๐๐๐ฎ๐ฆ๐๐ ๐ฟ๐๐ฆ๐ ๐ ยท๐ฟ๐ฏ๐ฆ๐๐ด๐ ๐๐ฉ๐ค๐ฑ๐๐ฉ๐ฏ ๐จ๐ค๐๐ผ๐ฆ๐๐ฉ๐ฅ ๐ฅ๐ช๐ก๐ต๐ค | |
| # ๐๐ด ๐๐ ๐ฆ๐ ๐๐จ๐ฏ ๐๐ค๐ฑ ๐ฏ๐ฒ๐๐ค๐ฆ ๐ข๐ฆ๐ ๐ฅ๐ฆ๐๐๐ ๐๐๐ฎ๐ฆ๐๐๐ ๐ฏ ๐ณ๐๐ผ ๐๐ฑ๐ค๐ผ๐ฆ๐ | |
| # klg, Jul 2025 | |
| package Unicode::Collate::Shavian; | |
| use strict; | |
| use base 'Unicode::Collate'; | |
| use Carp; | |
| our $version = Unicode::Collate->new->version; | |
| our $entry = do { | |
| my $start = { # ๐ฆ๐ฏ๐ฆ๐๐ฉ๐ค ๐ข๐ฑ๐ | |
| '13.0.0' => 0x4B8F, | |
| '15.1.0' => 0x4D7A, | |
| }->{$version}; | |
| warn "Unknown DUCET version" unless $start; | |
| unless (defined $start) { | |
| for (@INC) { | |
| next unless -f (my $fn = "$_/Unicode/Collate/allkeys.txt"); | |
| open my $f, '<:raw', $fn or die $!; | |
| while (<$f>) { | |
| next if /^[%#]/; | |
| if (/^\@version (.*)$/) { | |
| die "DUCET version mismatch: found $1, expected $version" | |
| unless $version eq $1; | |
| } | |
| if (/^10450\b/) { | |
| chomp, s/[%#].*\z//s, s/\s+//g; | |
| die "Syntax error" unless | |
| m!^[0-9A-F]+;\[(.)([0-9A-F]+)\.[^]]+\](.?)!i; | |
| die "Unexpected sequence of collation elements" if $3 eq '['; | |
| die "Unexpected variable collation element" if $1 eq '*'; | |
| die "Syntax error" unless $1 eq '.'; | |
| $start = hex $2; | |
| last; | |
| } | |
| } | |
| close $f or die $!; | |
| last; | |
| } | |
| } | |
| die "Unknown DUCET version" unless $start; | |
| my @a = (0, map sprintf("%04X",$start+$_), 0..42); | |
| << "# EOF"; | |
| 10450 ; [.$a[ 1].0020.0002] # SHAVIAN LETTER PEE | |
| 1045A ; [.$a[ 2].0020.0002] # SHAVIAN LETTER BAY | |
| 10451 ; [.$a[ 3].0020.0002] # SHAVIAN LETTER TEE | |
| 1045B ; [.$a[ 4].0020.0002] # SHAVIAN LETTER DAY | |
| 10452 ; [.$a[ 5].0020.0002] # SHAVIAN LETTER KEY | |
| 1045C ; [.$a[ 6].0020.0002] # SHAVIAN LETTER GAY | |
| 10453 ; [.$a[ 7].0020.0002] # SHAVIAN LETTER FEE | |
| 1045D ; [.$a[ 8].0020.0002] # SHAVIAN LETTER VIE | |
| 10454 ; [.$a[ 9].0020.0002] # SHAVIAN LETTER THAW | |
| 1045E ; [.$a[10].0020.0002] # SHAVIAN LETTER THEY | |
| 10455 ; [.$a[11].0020.0002] # SHAVIAN LETTER SEE | |
| 1045F ; [.$a[12].0020.0002] # SHAVIAN LETTER ZOO | |
| 10456 ; [.$a[13].0020.0002] # SHAVIAN LETTER SHE | |
| 10460 ; [.$a[14].0020.0002] # SHAVIAN LETTER ZHAY | |
| 10457 ; [.$a[15].0020.0002] # SHAVIAN LETTER CHEER | |
| 10461 ; [.$a[16].0020.0002] # SHAVIAN LETTER JAY | |
| 10458 ; [.$a[17].0020.0002] # SHAVIAN LETTER YE | |
| 10462 ; [.$a[18].0020.0002] # SHAVIAN LETTER WAY | |
| 10459 ; [.$a[19].0020.0002] # SHAVIAN LETTER ING | |
| 10463 ; [.$a[20].0020.0002] # SHAVIAN LETTER HAY | |
| 10464 ; [.$a[21].0020.0002] # SHAVIAN LETTER LEE | |
| 1046E ; [.$a[22].0020.0002] # SHAVIAN LETTER RAY | |
| 10465 ; [.$a[23].0020.0002] # SHAVIAN LETTER ME | |
| 1046F ; [.$a[24].0020.0002] # SHAVIAN LETTER NAY | |
| 10466 ; [.$a[25].0020.0002] # SHAVIAN LETTER IT | |
| 10470 ; [.$a[26].0020.0002] # SHAVIAN LETTER EASE | |
| 10467 ; [.$a[27].0020.0002] # SHAVIAN LETTER EGG | |
| 10471 ; [.$a[28].0020.0002] # SHAVIAN LETTER ACE | |
| 10468 ; [.$a[29].0020.0002] # SHAVIAN LETTER AT | |
| 10472 ; [.$a[30].0020.0002] # SHAVIAN LETTER ICE | |
| 10469 ; [.$a[31].0020.0002] # SHAVIAN LETTER AGO | |
| 10473 ; [.$a[32].0020.0002] # SHAVIAN LETTER US | |
| 1046A ; [.$a[33].0020.0002] # SHAVIAN LETTER OT | |
| 10474 ; [.$a[34].0020.0002] # SHAVIAN LETTER OWE | |
| 1046B ; [.$a[35].0020.0002] # SHAVIAN LETTER UUT | |
| 10475 ; [.$a[36].0020.0002] # SHAVIAN LETTER OOZE | |
| 1046C ; [.$a[37].0020.0002] # SHAVIAN LETTER OUT | |
| 10476 ; [.$a[38].0020.0002] # SHAVIAN LETTER OY | |
| 1046D ; [.$a[39].0020.0002] # SHAVIAN LETTER ARE* | |
| 10477 ; [.$a[40].0020.0002] # SHAVIAN LETTER AWE | |
| # SHAVIAN LETTER AIR* | |
| # SHAVIAN LETTER URGE | |
| 1047E ; [.$a[43].0020.0002] # SHAVIAN LETTER EAR* | |
| 10478 ; [.$a[39].0020.0003][.$a[22].0020.0002] # SHAVIAN LIGATURE ARE | |
| 10479 ; [.$a[40].0020.0003][.$a[22].0020.0002] # SHAVIAN LIGATURE OR | |
| 1047A ; [.$a[41].0020.0003][.$a[22].0020.0002] # SHAVIAN LIGATURE AIR | |
| 1047B ; [.$a[42].0020.0003][.$a[22].0020.0002] # SHAVIAN LIGATURE ERR | |
| 1047C ; [.$a[31].0020.0003][.$a[22].0020.0002] # SHAVIAN LIGATURE ARRAY | |
| 1047D ; [.$a[43].0020.0003][.$a[22].0020.0002] # SHAVIAN LIGATURE EAR | |
| 1047F ; [.$a[17].0020.0003][.$a[36].0020.0002] # SHAVIAN LIGATURE YEW | |
| 1047A FE00 ; [.$a[41].0020.0002] # SHAVIAN LETTER AIR* (Inter Alia hack) | |
| 1047B FE00 ; [.$a[42].0020.0002] # SHAVIAN LETTER URGE (Inter Alia hack) | |
| E72A ; [.$a[41].0020.0002] # SHAVIAN LETTER AIR* (PUA as I use it, NOT CSUR) | |
| E72B ; [.$a[42].0020.0002] # SHAVIAN LETTER URGE (PUA as I use it, NOT CSUR) | |
| # EOF | |
| }; | |
| sub new { | |
| my ($cls, %opt) = @_; | |
| $opt{entry} = ($opt{entry}//"")."\n".$entry; | |
| $cls->SUPER::new(%opt); | |
| } | |
| return 1 unless $0 eq __FILE__; | |
| package main; | |
| use strict; | |
| use open qw/:std :locale/; | |
| my $collator = Unicode::Collate::Shavian->new(); | |
| my @lines = <>; | |
| @lines = $collator->sort(@lines); | |
| print for @lines; | |
| exit; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| use strict; | |
| use utf8; | |
| use open qw/:std :utf8/; | |
| use List::Util 'shuffle'; | |
| use lib '.'; | |
| use sort_uca; # Unicode::Collate::Shavian but yolo | |
| my (@test, %name); | |
| open my $test, '<:utf8', './test.txt' or die $!; | |
| while (<$test>) { | |
| next if /^#/; chomp; | |
| if (/^%%\s*(.*)$/) { | |
| push @test, []; | |
| $name{$test[-1]} = $1; | |
| next; | |
| } | |
| push @{$test[-1]}, $_; | |
| } | |
| close $test or die $!; | |
| @test = grep scalar @$_, @test; | |
| printf "1..%u\n", scalar @test; | |
| my $collator = Unicode::Collate::Shavian->new; | |
| for my $i (0 .. $#test) { | |
| my @x = @{$test[$i]}; | |
| my @y = shuffle(@x); | |
| @y = $collator->sort(@y); | |
| my $err = 0; | |
| $err += $x[$_] ne $y[$_] for 0..$#y; | |
| my $n = $name{$test[$i]} // ''; $n = " - $n" if $n; | |
| $n .= ' # TODO' if $n and $n =~ /\b๐ช๐๐๐ฉ๐ฏ๐ฉ๐ค\b/; | |
| if (!$err) { | |
| printf "ok %u%s\n", $i+1, $n; | |
| } else { | |
| printf "not ok %u%s\n", $i+1, $n; | |
| eval { | |
| require Algorithm::Diff; | |
| my $dif = Algorithm::Diff::sdiff(\@x, \@y); | |
| my $len = 0; for (@x) { $len = length if $len < length } | |
| for (@$dif) { | |
| my ($dir, $x, $y) = @$_; | |
| printf("# %-*s %s %s\n", $len, | |
| $x, $dir =~ y/uc+-/ :></r, $y); | |
| } | |
| }; | |
| } | |
| } | |
| 1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment