Created
February 9, 2011 21:11
-
-
Save williamjacksn/819290 to your computer and use it in GitHub Desktop.
Get album art from ocremix.org
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# ocremix.org album art get! | |
# (c) 2011 William Jackson <[email protected]> | |
# Download album art for a particular remix into the current working directory. | |
# Rename the album art to "folder.(jpg|gif|png)". | |
use strict; | |
use warnings; | |
use Cwd; | |
use LWP::Simple; | |
use HTML::TreeBuilder; | |
my $url; | |
if (defined($ARGV[0]) && ($ARGV[0] =~ /^\d+$/)) { | |
$url = sprintf("http://ocremix.org/remix/OCR%05d/", $ARGV[0]); | |
} else { | |
print "Use `ocr_album_art.pl <ocr_id> [-f]` to download album art into " . | |
"this directory.\n"; | |
print "To report bugs or contact the author, leave a comment at ". | |
"https://gist.github.com/819290\n"; | |
exit(0); | |
} | |
# Use the command line argument `-f` or `--force` after <ocr_id> to delete | |
# existing folder.(jpg|gif|png) files in the current working directory. | |
my $force = 0; | |
if (defined($ARGV[1]) && (($ARGV[1] eq "-f") || ($ARGV[1] eq "--force"))) { | |
$force = 1; | |
} | |
# Check for the existence of folder.(jpg|gif|png) in | |
# the current working directory. | |
foreach my $ext ("jpg", "gif", "png") { | |
if (-e cwd() . "/folder." . $ext) { | |
if ($force) { | |
unlink(cwd() . "/folder." . $ext); | |
} else { | |
print STDERR "There is already a folder." . $ext . | |
" in this directory.\n"; | |
print STDERR "Use `ocr_album_art.pl <ocr_id> -f` to overwrite " . | |
"the album art in this directory.\n"; | |
exit(1); | |
} | |
} | |
} | |
# Check to make sure the requested ocr_id is valid. | |
my $html = get($url); | |
unless (defined($html)) { | |
print STDERR "There is no information at this url: $url\n"; | |
print STDERR "Are you sure " . $ARGV[0] . " is a valid ocr_id?\n"; | |
exit(1); | |
} | |
my $tree = HTML::TreeBuilder->new_from_content($html); | |
# The images I want are inside <div id="panel-main"> and always have | |
# the string "games" in the src attribute. | |
my $panel_main = $tree->look_down("id", "panel-main"); | |
my $img = $panel_main->look_down("_tag", "img", | |
sub {index($_[0]->attr("src"), "games") > -1}); | |
# The img src is usually something like: | |
# "/thumbs/180/files/images/games/sms/3/outrun-sms-title-1011.gif" | |
# I can safely remove everything before "files". | |
my $src = "http://ocremix.org/" . | |
substr($img->attr("src"), index($img->attr("src"), "files")); | |
print " Source: " . $src . "\n"; | |
# I assume the file will have a single dot that separates the filename from | |
# the extension (after the . in "ocremix.org", of course). | |
my $ext = substr($src, index($src, ".", 15) + 1); | |
# Just in case I am downloading the art for "Knuckles' Chaotix" ... | |
# No, seriously, that is the only album that does not use .jpg, .gif, or .png. | |
if ($ext eq "jpeg") { | |
$ext = "jpg"; | |
} | |
my $dest = cwd() . "/folder." . $ext; | |
print "Destination: " . $dest . "\n"; | |
getstore($src, $dest); | |
$tree->delete(); | |
exit(0); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# ocremix.org album art get (for Rainwave)! | |
# (c) 2011 William Jackson <[email protected]> | |
# Download album art for a bunch of games. | |
# Rename the album art to "folder.(jpg|gif|png)". | |
# With a directory structures such as "./Album1/Remix1.mp3", this should be run | |
# from ./ (the parent directory of each album directory). | |
use strict; | |
use warnings; | |
use Cwd; | |
use LWP::Simple; | |
use HTML::TreeBuilder; | |
# use MP3::Tag; | |
# Keep track of how this run went. | |
my $existing = 0; | |
my $fetch_error = 0; | |
my $success = 0; | |
# Get a list of directories. | |
my @albumdirs; | |
opendir(ROOT, cwd()); | |
while (defined(my $file = readdir(ROOT))) { | |
next if $file =~ /^\.\.?$/; | |
if (-d $file) { | |
push(@albumdirs, $file); | |
} | |
} | |
closedir(ROOT); | |
ALBUMDIR: foreach my $albumdir (@albumdirs) { | |
# For each directory in the list, chdir into that directory. | |
chdir($albumdir); | |
print " Directory: " . cwd() . "\n"; | |
# Check for the existence of "folder.(jpg|gif|png)" in | |
# the current directory. | |
foreach my $ext ("jpg", "gif", "png") { | |
if (-e cwd() . "/folder." . $ext) { | |
print STDERR " Existing: " . cwd() . "/folder." . $ext . "\n"; | |
$existing ++; | |
next ALBUMDIR; | |
} | |
} | |
# Get the first .mp3 file in the directory. | |
my $mp3file; | |
opendir(ALBUMDIR, cwd()); | |
while (defined(my $file = readdir(ALBUMDIR))) { | |
if (index($file, ".mp3") > -1) { | |
$mp3file = $file; | |
last; | |
} | |
} | |
closedir(ALBUMDIR); | |
# If there were no mp3 files in this directory, | |
# move on to the next directory. | |
unless (defined($mp3file)) { | |
next ALBUMDIR; | |
} | |
# Get the "WWW" id3v2 tag for that .mp3 file. | |
# The following (commented) code is for use with the MP3::Tag module. | |
# my $mp3tag = MP3::Tag->new($mp3file); | |
# $mp3tag->get_tags(); | |
# my ($info, undef) = $mp3tag->{ID3v2}->get_frame("WXXX"); | |
# my $url = $$info{"URL"} . "\n"; | |
my $url = substr(`tagset $mp3file | grep WWW`, 6); | |
chomp($url); | |
my $html = get($url); | |
sleep(1); | |
unless (defined($html)) { | |
print STDERR "Fetch Error: $url\n"; | |
$fetch_error ++; | |
next ALBUMDIR; | |
} | |
my $tree = HTML::TreeBuilder->new_from_content($html); | |
# The images I want are inside <div id="panel-main"> and always have | |
# the string "games" in the src attribute. | |
my $panel_main = $tree->look_down("id", "panel-main"); | |
my $img = $panel_main->look_down("_tag", "img", | |
sub {index($_[0]->attr("src"), "games") > -1}); | |
# The img src is usually something like: | |
# "/thumbs/180/files/images/games/sms/3/outrun-sms-title-1011.gif" | |
# I can safely remove everything before "files". | |
my $src = "http://ocremix.org/" . | |
substr($img->attr("src"), index($img->attr("src"), "files")); | |
print " Source: " . $src . "\n"; | |
# I assume the file will have a single dot that separates the filename from | |
# the extension (after the . in "ocremix.org", of course). | |
my $ext = substr($src, index($src, ".", 15) + 1); | |
# Just in case I am downloading the art for "Knuckles' Chaotix" ... | |
# No, seriously, that is the only album that does not use | |
# .jpg, .gif, or .png. | |
if ($ext eq "jpeg") { | |
$ext = "jpg"; | |
} | |
my $dest = cwd() . "/folder." . $ext; | |
print "Destination: " . $dest . "\n"; | |
# Download the album art. | |
my $http_response = getstore($src, $dest); | |
sleep(1); | |
if (is_success($http_response)) { | |
$success ++; | |
} elsif (is_error($http_response)) { | |
$fetch_error ++; | |
} | |
# Recycle the parsed HTML tree. | |
$tree->delete(); | |
} continue { | |
# Change back to the root directory. | |
print "------\n"; | |
chdir(".."); | |
} | |
# Print some stats about how this run went. | |
print " Success: $success\n"; | |
print " Existing: $existing\n"; | |
print "Fetch Error: $fetch_error\n"; | |
exit(0); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment