Skip to content

Instantly share code, notes, and snippets.

@bluebear94
Created October 21, 2017 23:22
Show Gist options
  • Save bluebear94/6ad6a2d72926bb360be24c023f743ecf to your computer and use it in GitHub Desktop.
Save bluebear94/6ad6a2d72926bb360be24c023f743ecf to your computer and use it in GitHub Desktop.
Create CSVs showing frequency of initial and final consonant clusters in Arka words containing at least one vowel
#!/usr/bin/env perl6
use Archive::Libarchive::Constants;
use Archive::Libarchive::Raw;
use LibCurl::Easy;
use NativeCall;
my %startclus;
my %endclus;
my @alpha = <t k x s n v f m d g p b h y c r z j w l>;
my %revalpha = @alpha.antipairs;
sub process-clusters(Str $word) {
return if $word !~~ / <[ a e i o u ]> /;
return if $word.chars < 2;
my $start = $word.substr(0, 2);
my $end = $word.substr(* - 2);
%startclus{$start}.push($word);
%endclus{$end}.push($word);
}
my $h = LibCurl::Easy.new(
URL => 'http://mindsc.ape.jp/klel/skol.zip'
).perform;
my $zip-content = $h.buf;
sub get-dict($zip-content) {
constant LIB = $*DISTRO.is-win
?? %?RESOURCES<libarchive.dll>.absolute
!! ('archive', v13);
sub archive_read_data(archive $a, Buf $buf, size_t $len --> int32) is native(LIB) { * }
my $archive = archive_read_new();
archive_read_support_filter_all($archive);
archive_read_support_format_zip($archive);
my $res = archive_read_open_memory(
$archive,
$zip-content, $zip-content.bytes);
die 'Unable to open archive' if $res != ARCHIVE_OK;
my $contentb = Buf.new;
my $entry = archive_entry.new;
loop {
$res = archive_read_next_header($archive, $entry);
last if $res == ARCHIVE_EOF;
die 'Unable to read entry' if $res > ARCHIVE_OK;
if (archive_entry_pathname($entry) eq 'skol/arka_xano.dat') {
loop {
constant SIZE = 1024;
my Buf $buf = Buf.new;
$buf[SIZE - 1] = 0;
my $res = archive_read_data($archive, $buf, SIZE);
last if $res == 0;
$contentb.append($buf.subbuf(0, $res));
}
last;
}
archive_read_data_skip($archive);
}
archive_read_free($archive);
return $contentb.decode;
}
my $content = get-dict($zip-content);
for $content.lines -> $line {
next if $line ~~ / <[ . , ]> /;
my $i = min(
$line.index(" ///"),
$line.index("("));
my $term = $line.substr(0, $i).chomp;
my @words = $term.split(/\s+/);
process-clusters($_) for @words;
}
sub write-report(%d, $fnc, $fnx) {
my $fhc = open($fnc, :w);
my $fhx = open($fnx, :w);
# write headers
for @alpha {
$fhc.print(",$_");
$fhx.print(",$_");
}
$fhc.print("\n");
$fhx.print("\n");
for ^@alpha -> $i {
$fhc.print(@alpha[$i]);
$fhx.print(@alpha[$i]);
for ^@alpha -> $j {
my $k = @alpha[$i] ~ @alpha[$j];
my $ec = %d{$k}:exists ?? %d{$k}.elems !! 0;
$fhc.print("," ~ $ec);
$fhx.print("," ~ (%d{$k}[0] // ""));
}
$fhc.print("\n");
$fhx.print("\n");
}
$fhc.close();
$fhx.close();
}
write-report(%startclus, "start-count.csv", "start-example.csv");
write-report(%endclus, "end-count.csv", "end-example.csv");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment