Skip to content

Instantly share code, notes, and snippets.

@jow-
Created November 29, 2023 08:38
Show Gist options
  • Save jow-/a6bd981e3fcca35a8b908b0969bb534c to your computer and use it in GitHub Desktop.
Save jow-/a6bd981e3fcca35a8b908b0969bb534c to your computer and use it in GitHub Desktop.
Perl script to convert Wiki hwdata into a big JSON dictionary
#!/usr/bin/env perl
use strict;
use JSON;
sub filter_empty {
my ($val) = @_;
return undef if $val eq '¿' || $val eq 'http://¿' || $val eq 'https://¿';
return $val;
}
sub process_page {
my ($path, $source) = @_;
my ($table) = $source =~ m!^---- dataentry techdata ----\n((?:[^:\n]+:[^\n]+\n)+)^----\n!m;
if (!$table) {
warn "No techdata table in '$path'";
return;
}
my $dict = {};
foreach my $row (split /\n/, $table) {
my ($key, $val) = $row =~ m!^([^:]+):(.+)$!;
next if !defined($key) || !defined($val);
$key =~ s!^\s+|\s+$!!g;
$val =~ s!^\s+|[ \t]*#.*$|\s+$!!g;
if ($key =~ s!s$!!) {
$val = [ grep { defined } map { filter_empty($_) } split /\s*,\s*/, $val ];
undef $val if @$val == 0;
}
else {
$val = filter_empty($val);
}
my ($name, $type) = $key =~ m!^(.+)_([^_]*)$!;
($name, $type) = ($key, '') if !defined($name);
if (defined($val)) {
$dict->{$name} = $val;
}
}
return $dict;
}
my $dump = {};
open(my $find, '-|', qw(find . -type f -name *.txt)) || die "Unable to execute find: $!\n";
while (defined(my $path = readline $find)) {
chomp $path;
if (open(my $page, '<', $path)) {
local $/;
my $data = process_page($path, readline $page);
close $page;
if ($data) {
my ($id) = $path =~ m!/([^/]+/[^/]+)\.txt$!;
$dump->{$id} = $data;
}
}
else {
warn "Unable to open '$path': $!\n";
}
}
close $find;
print JSON->new->pretty->encode($dump);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment