Created
April 12, 2017 09:46
-
-
Save robhammond/73f2de2aab5b963f46660cd09bfb0e43 to your computer and use it in GitHub Desktop.
Bulk AMP URL validation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use Modern::Perl; | |
use Mojo::UserAgent; | |
use Mojo::JSON qw(decode_json); | |
use Excel::Writer::XLSX; | |
use Data::Dumper; | |
my $in_file = 'bhm.txt'; | |
my $output = 'bhm.xlsx'; | |
open(my $fh, "<", $in_file) | |
or die "Failed to open file: $!\n"; | |
my @urls; | |
while(<$fh>) { | |
chomp; | |
push @urls, $_; | |
} | |
close $fh; | |
my $workbook = Excel::Writer::XLSX->new( $output ); | |
my $worksheet = $workbook->add_worksheet(); | |
$worksheet->write( 0, 0, [ | |
"URL", | |
"AMP Status", | |
"Canonical URL", | |
"AMP URL", | |
"AMP Required Markup - Status", | |
"AMP Required Markup - Warning Count", | |
"AMP Required Markup - Warning Status", | |
"AMP Validation - Status", | |
"Google AMP Cache - Status", | |
"Google AMP Cache - Result", | |
"Google AMP Cache - URL", | |
"Google AMP Cache - Viewer URL", | |
"Robots.txt - Status", | |
"Robots.txt Googlebot - Status", | |
"Robots.txt Googlebot-Smartphone - Status", | |
"Robots Meta - Status", | |
"X-Robots Tag Header - Status", | |
"Structured Data - Status", | |
"Structured Data - Result", | |
"Structured Data - Kind", | |
"Structured Data - Type", | |
"Structured Data Is AMP?", | |
"Structured Data Logo - Status", | |
"Structured Data Logo - Result", | |
"Structured Data Article - Status", | |
"Structured Data Article - Result" | |
] | |
); | |
my $ua = Mojo::UserAgent->new; | |
$ua->transactor->name('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'); | |
my $ampbench_url = "https://ampbench.appspot.com"; # Replace URL if running locally | |
my $amp_api = $ampbench_url . "/api2"; # Replace with desired API version | |
my $i = 1; | |
for my $url (@urls) { | |
say "fetching $url"; | |
my $tx = $ua->get($amp_api . "?url=$url"); | |
if (my $res = $tx->success) { | |
my $json = decode_json $res->body; | |
say "response: $json->{status}"; | |
# say Dumper($json); | |
$worksheet->write( $i, 0, [ | |
$url, | |
$json->{'status'}, | |
$json->{'amp_links'}->{'canonical_url'}, | |
$json->{'amp_links'}->{'amphtml_url'}, | |
'',#$json->{'amp_required_markup'}->{'status'}, | |
$json->{'amp_required_markup'}->{'warning_count'}, | |
$json->{'amp_required_markup'}->{'warning_status'}, | |
$json->{'amp_validation'}->{'status'}, | |
$json->{'google_amp_cache'}->{'status'}, | |
$json->{'google_amp_cache'}->{'result'}, | |
$json->{'google_amp_cache'}->{'google_amp_cache_url'}, | |
$json->{'google_amp_cache'}->{'google_amp_viewer_url'}, | |
$json->{'robots'}->{'robots_txt_status'}, | |
$json->{'robots'}->{'robots_txt_googlebot_status'}, | |
$json->{'robots'}->{'robots_txt_googlebot_smartphone_status'}, | |
$json->{'robots'}->{'robots_meta_status'}, | |
$json->{'robots'}->{'x_robots_tag_header_status'}, | |
$json->{'sd_validation'}->{'status'}, | |
$json->{'sd_validation'}->{'result'}, | |
$json->{'sd_validation'}->{'sd_kind'}, | |
$json->{'sd_validation'}->{'sd_type'}, | |
$json->{'sd_validation'}->{'sd_type_is_amp'}, | |
$json->{'sd_validation'}->{'sd_logo_image'}->{'status'}, | |
$json->{'sd_validation'}->{'sd_logo_image'}->{'result'}, | |
$json->{'sd_validation'}->{'sd_article'}->{'status'}, | |
$json->{'sd_validation'}->{'sd_article'}->{'result'}, | |
]); | |
$i++; | |
} else { | |
say "error"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment