bluebear94 · October 21, 2017 23:22
diff --git a/amisoark.pl6 b/amisoark.pl6
 #!/usr/bin/env perl6

 use Archive::Libarchive::Constants;
 use Archive::Libarchive::Raw;
 use LibCurl::Easy;
 use NativeCall;

 my %startclus;
 my %endclus;
 my @alpha = <t k x s n v f m d g p b h y c r z j w l>;
 my %revalpha = @alpha.antipairs;

 sub process-clusters(Str $word) {
  return if $word !~~ / <[ a e i o u ]> /;
  return if $word.chars < 2;
  my $start = $word.substr(0, 2);
  my $end = $word.substr(* - 2);
  %startclus{$start}.push($word);
  %endclus{$end}.push($word);
 }

 my $h = LibCurl::Easy.new(
  URL => 'http://mindsc.ape.jp/klel/skol.zip'
  ).perform;

 my $zip-content = $h.buf;

 sub get-dict($zip-content) {
  constant LIB = $*DISTRO.is-win
                ?? %?RESOURCES<libarchive.dll>.absolute
                !! ('archive', v13);
  sub archive_read_data(archive $a, Buf $buf, size_t $len --> int32) is native(LIB) { * }
  my $archive = archive_read_new();
  archive_read_support_filter_all($archive);
  archive_read_support_format_zip($archive);
  my $res = archive_read_open_memory(
    $archive,
    $zip-content, $zip-content.bytes);
  die 'Unable to open archive' if $res != ARCHIVE_OK;

  my $contentb = Buf.new;
  my $entry = archive_entry.new;
  loop {
    $res = archive_read_next_header($archive, $entry);
    last if $res == ARCHIVE_EOF;
    die 'Unable to read entry' if $res > ARCHIVE_OK;
    if (archive_entry_pathname($entry) eq 'skol/arka_xano.dat') {
      loop {
        constant SIZE = 1024;
        my Buf $buf = Buf.new;
        $buf[SIZE - 1] = 0;
        my $res = archive_read_data($archive, $buf, SIZE);
        last if $res == 0;
        $contentb.append($buf.subbuf(0, $res));
      }
      last;
    }
    archive_read_data_skip($archive);
  }
  archive_read_free($archive);
  return $contentb.decode;
 }

 my $content = get-dict($zip-content);

 for $content.lines -> $line {
  next if $line ~~ / <[ . , ]> /;
  my $i = min(
    $line.index(" ///"),
    $line.index("("));
  my $term = $line.substr(0, $i).chomp;
  my @words = $term.split(/\s+/);
  process-clusters($_) for @words;
 }

 sub write-report(%d, $fnc, $fnx) {
  my $fhc = open($fnc, :w);
  my $fhx = open($fnx, :w);
  # write headers
  for @alpha {
    $fhc.print(",$_");
    $fhx.print(",$_");
  }
  $fhc.print("\n");
  $fhx.print("\n");
  for ^@alpha -> $i {
    $fhc.print(@alpha[$i]);
    $fhx.print(@alpha[$i]);
    for ^@alpha -> $j {
      my $k = @alpha[$i] ~ @alpha[$j];
      my $ec = %d{$k}:exists ?? %d{$k}.elems !! 0;
      $fhc.print("," ~ $ec);
      $fhx.print("," ~ (%d{$k}[0] // ""));
    }
    $fhc.print("\n");
    $fhx.print("\n");
  }
  $fhc.close();
  $fhx.close();
 }

 write-report(%startclus, "start-count.csv", "start-example.csv");
 write-report(%endclus, "end-count.csv", "end-example.csv");
	#!/usr/bin/env perl6

	use Archive::Libarchive::Constants;
	use Archive::Libarchive::Raw;
	use LibCurl::Easy;
	use NativeCall;

	my %startclus;
	my %endclus;
	my @alpha = <t k x s n v f m d g p b h y c r z j w l>;
	my %revalpha = @alpha.antipairs;

	sub process-clusters(Str $word) {
	return if $word !~~ / <[ a e i o u ]> /;
	return if $word.chars < 2;
	my $start = $word.substr(0, 2);
	my $end = $word.substr(* - 2);
	%startclus{$start}.push($word);
	%endclus{$end}.push($word);
	}

	my $h = LibCurl::Easy.new(
	URL => 'http://mindsc.ape.jp/klel/skol.zip'
	).perform;

	my $zip-content = $h.buf;

	sub get-dict($zip-content) {
	constant LIB = $*DISTRO.is-win
	?? %?RESOURCES<libarchive.dll>.absolute
	!! ('archive', v13);
	sub archive_read_data(archive $a, Buf $buf, size_t $len --> int32) is native(LIB) { * }
	my $archive = archive_read_new();
	archive_read_support_filter_all($archive);
	archive_read_support_format_zip($archive);
	my $res = archive_read_open_memory(
	$archive,
	$zip-content, $zip-content.bytes);
	die 'Unable to open archive' if $res != ARCHIVE_OK;

	my $contentb = Buf.new;
	my $entry = archive_entry.new;
	loop {
	$res = archive_read_next_header($archive, $entry);
	last if $res == ARCHIVE_EOF;
	die 'Unable to read entry' if $res > ARCHIVE_OK;
	if (archive_entry_pathname($entry) eq 'skol/arka_xano.dat') {
	loop {
	constant SIZE = 1024;
	my Buf $buf = Buf.new;
	$buf[SIZE - 1] = 0;
	my $res = archive_read_data($archive, $buf, SIZE);
	last if $res == 0;
	$contentb.append($buf.subbuf(0, $res));
	}
	last;
	}
	archive_read_data_skip($archive);
	}
	archive_read_free($archive);
	return $contentb.decode;
	}

	my $content = get-dict($zip-content);

	for $content.lines -> $line {
	next if $line ~~ / <[ . , ]> /;
	my $i = min(
	$line.index(" ///"),
	$line.index("("));
	my $term = $line.substr(0, $i).chomp;
	my @words = $term.split(/\s+/);
	process-clusters($_) for @words;
	}

	sub write-report(%d, $fnc, $fnx) {
	my $fhc = open($fnc, :w);
	my $fhx = open($fnx, :w);
	# write headers
	for @alpha {
	$fhc.print(",$_");
	$fhx.print(",$_");
	}
	$fhc.print("\n");
	$fhx.print("\n");
	for ^@alpha -> $i {
	$fhc.print(@alpha[$i]);
	$fhx.print(@alpha[$i]);
	for ^@alpha -> $j {
	my $k = @alpha[$i] ~ @alpha[$j];
	my $ec = %d{$k}:exists ?? %d{$k}.elems !! 0;
	$fhc.print("," ~ $ec);
	$fhx.print("," ~ (%d{$k}[0] // ""));
	}
	$fhc.print("\n");
	$fhx.print("\n");
	}
	$fhc.close();
	$fhx.close();
	}

	write-report(%startclus, "start-count.csv", "start-example.csv");
	write-report(%endclus, "end-count.csv", "end-example.csv");