Skip to content

Instantly share code, notes, and snippets.

@z448
Last active September 16, 2015 18:20
Show Gist options
  • Save z448/5c93a55d2f64f48c7e4c to your computer and use it in GitHub Desktop.
Save z448/5c93a55d2f64f48c7e4c to your computer and use it in GitHub Desktop.
minicpan file '02packages.details.txt' to json
#!/usr/bin/env perl
# parsing content of CPAN mirror (see https://metacpan.org/pod/CPAN::Mini)
# 02packages.details.txt is 16Mb text file listing latest versions of all (~33000) cpan modules
# minicpan2j.pl takes only unique distribution names into json to decrease size below 1Mb, which is cache limit for WebClip in ios7+
use IO::All;
use JSON::XS; #do it fast
use List::MoreUtils qw/ uniq /;
my @lines = io('02packages.details.txt')->slurp; # slurp file to array
my (@pkgs, %pkg);
sub splitit { # split lines by whitespace
for my $line (@lines) {
@pkg = split(' ', $line );
my $word = $pkg[2]; #take only last bit
push @pkgs, $word;
}
}
sub unique {
my @unique = uniq @pkgs; #copy unique
for (@unique) {
s/.*\/(.*)\-.*?\.tar\.gz/$1/; $_=$1; #strip leading path, remove suffix
s/\-/\ /g; # dash to \s
$pkg{'t'} = $_;
push @p, {%pkg}; #hash ref to array
}
}
sub jay { #to json
my $p = \@p;
my $j = encode_json $p;
print $j;
}
splitit(); unique(); jay();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment