Last active
September 16, 2015 18:20
-
-
Save z448/5c93a55d2f64f48c7e4c to your computer and use it in GitHub Desktop.
minicpan file '02packages.details.txt' to json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# parsing content of CPAN mirror (see https://metacpan.org/pod/CPAN::Mini) | |
# 02packages.details.txt is 16Mb text file listing latest versions of all (~33000) cpan modules | |
# minicpan2j.pl takes only unique distribution names into json to decrease size below 1Mb, which is cache limit for WebClip in ios7+ | |
use IO::All; | |
use JSON::XS; #do it fast | |
use List::MoreUtils qw/ uniq /; | |
my @lines = io('02packages.details.txt')->slurp; # slurp file to array | |
my (@pkgs, %pkg); | |
sub splitit { # split lines by whitespace | |
for my $line (@lines) { | |
@pkg = split(' ', $line ); | |
my $word = $pkg[2]; #take only last bit | |
push @pkgs, $word; | |
} | |
} | |
sub unique { | |
my @unique = uniq @pkgs; #copy unique | |
for (@unique) { | |
s/.*\/(.*)\-.*?\.tar\.gz/$1/; $_=$1; #strip leading path, remove suffix | |
s/\-/\ /g; # dash to \s | |
$pkg{'t'} = $_; | |
push @p, {%pkg}; #hash ref to array | |
} | |
} | |
sub jay { #to json | |
my $p = \@p; | |
my $j = encode_json $p; | |
print $j; | |
} | |
splitit(); unique(); jay(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment