rage311 · April 4, 2019 05:03 · rage311 · Apr 4, 2019
diff --git a/archive_reddit_images.pl b/archive_reddit_images.pl
 #!/usr/bin/env perl

 use 5.024;
 use Mojo::Base -strict;
 use Mojo::UserAgent;
 use Mojo::Util 'dumper';

 binmode STDOUT, ':encoding(UTF-8)';

 say '[[ ', scalar localtime, ' ]]';

 die 'Subreddit name(s) required as arguments' unless
  my @subreddits = map { lc } @ARGV;

 my $ua = Mojo::UserAgent->new->max_redirects(3);

 for my $subreddit (@subreddits) {
  my $top_result = $ua->get(
    "https://www.reddit.com/r/$subreddit/top.json",
    form => {
      sort  => 'top',
      t     => 'day',   # or 'week', 'month', 'year', 'all'
      limit => 1,       # how many top results in given time period
    })->result;

  warn "Error in response for '$subreddit'\n" . dumper($top_result) and next unless
    my $submissions = $top_result->json('/data/children');

  for my $submission (@$submissions) {
    my $submission_data = $submission->{data};

    warn "No URL found for:\n" . dumper($submission_data) and next unless
      my $url = Mojo::URL->new($submission_data->{url});

    # special cases for URLs -- no doubt there are more than this
    if ($url->host =~ /imgur/) {
      if ($url->path =~ /\.gifv/) {
        $url->path($url->path =~ s/gifv/mp4/r);
      }
      elsif ($url->path !~ /\..+/) {
        $url->path($url->path . '.jpg');
      }
    }

    warn "No title found for:\n" . dumper($submission_data) and next unless
      my $title = $submission_data->{title};

    my ($day, $month, $year) = (localtime)[3..5];

    #JustPerlThings
    $year  += 1900;
    $month += 1;

    my $date_string = sprintf '%d-%02d-%02d', $year, $month, $day;

    my $dir = Mojo::File->new('reddit_images', $subreddit)->make_path;
    my $full_path = Mojo::File->new(
      $dir,
      join(
        '__',
        $date_string,
        $title,
        join('_', $url->host, split '/', ($url->path =~ s{^/}{}r))
      ),
    );

    warn "Unable to retrieve: $url" and next unless
      my $image = $ua->get($url)->result;

    warn "Already exists: $full_path" and next if
      -f $full_path;

    $image->content->asset->move_to($full_path);
    say "Archived: $full_path";
  }
 }
	#!/usr/bin/env perl

	use 5.024;
	use Mojo::Base -strict;
	use Mojo::UserAgent;
	use Mojo::Util 'dumper';

	binmode STDOUT, ':encoding(UTF-8)';

	say '[[ ', scalar localtime, ' ]]';

	die 'Subreddit name(s) required as arguments' unless
	my @subreddits = map { lc } @ARGV;

	my $ua = Mojo::UserAgent->new->max_redirects(3);

	for my $subreddit (@subreddits) {
	my $top_result = $ua->get(
	"https://www.reddit.com/r/$subreddit/top.json",
	form => {
	sort => 'top',
	t => 'day', # or 'week', 'month', 'year', 'all'
	limit => 1, # how many top results in given time period
	})->result;

	warn "Error in response for '$subreddit'\n" . dumper($top_result) and next unless
	my $submissions = $top_result->json('/data/children');

	for my $submission (@$submissions) {
	my $submission_data = $submission->{data};

	warn "No URL found for:\n" . dumper($submission_data) and next unless
	my $url = Mojo::URL->new($submission_data->{url});

	# special cases for URLs -- no doubt there are more than this
	if ($url->host =~ /imgur/) {
	if ($url->path =~ /\.gifv/) {
	$url->path($url->path =~ s/gifv/mp4/r);
	}
	elsif ($url->path !~ /\..+/) {
	$url->path($url->path . '.jpg');
	}
	}

	warn "No title found for:\n" . dumper($submission_data) and next unless
	my $title = $submission_data->{title};

	my ($day, $month, $year) = (localtime)[3..5];

	#JustPerlThings
	$year += 1900;
	$month += 1;

	my $date_string = sprintf '%d-%02d-%02d', $year, $month, $day;

	my $dir = Mojo::File->new('reddit_images', $subreddit)->make_path;
	my $full_path = Mojo::File->new(
	$dir,
	join(
	'__',
	$date_string,
	$title,
	join('_', $url->host, split '/', ($url->path =~ s{^/}{}r))
	),
	);

	warn "Unable to retrieve: $url" and next unless
	my $image = $ua->get($url)->result;

	warn "Already exists: $full_path" and next if
	-f $full_path;

	$image->content->asset->move_to($full_path);
	say "Archived: $full_path";
	}
	}