Created
February 21, 2017 21:39
-
-
Save pshapiro/bca29598a38b09a332b1af2f979a6cf2 to your computer and use it in GitHub Desktop.
AMPBench API Bulk Validator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import csv | |
import os | |
# For details about AMPBench and the API: | |
# https://github.com/ampproject/ampbench | |
urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: ')) | |
urls = open(urlinput, "r") | |
outputcsv = os.path.join(os.path.dirname(__file__), input('Enter a filename (minus file extension): ')+'.csv') | |
ampbench_url = "https://ampbench.appspot.com" # Replace URL if running locally | |
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36' | |
headers = { 'User-Agent' : user_agent } | |
f = csv.writer(open(outputcsv, "w+", newline="\n", encoding="utf-8")) | |
f.writerow(["URL", | |
"AMP Status", | |
"Canonical URL", | |
"AMP URL", | |
"AMP Required Markup - Status", | |
"AMP Required Markup - Warning Count", | |
"AMP Required Markup - Warning Status", | |
"AMP Validation - Status", | |
"Google AMP Cache - Status", | |
"Google AMP Cache - Result", | |
"Google AMP Cache - URL", | |
"Google AMP Cache - Viewer URL", | |
"Robots.txt - Status", | |
"Robots.txt Googlebot - Status" | |
"Robots.txt Googlebot-Smartphone - Status", | |
"Robots Meta - Status", | |
"X-Robots Tag Header - Status", | |
"Structured Data - Status", | |
"Structured Data - Result", | |
"Structured Data - Kind", | |
"Structured Data - Type", | |
"Structured Data Is AMP?", | |
"Structured Data Logo - Status", | |
"Structured Data Logo - Result", | |
"Structured Data Article - Status", | |
"Structured Data Article - Result" | |
]) | |
amp_api = ampbench_url + "/api2" # Replace with desired API version | |
for line in iter(urls): | |
querystring = {"url" : line.strip()} | |
data = requests.request("GET", amp_api, params=querystring) | |
respData = str(data.text) | |
j_obj = json.loads(respData) | |
f.writerow([line, | |
str(j_obj['status']), | |
str(j_obj['amp_links']['canonical_url']), | |
str(j_obj['amp_links']['amphtml_url']), | |
str(j_obj['amp_required_markup']['status']), | |
str(j_obj['amp_required_markup']['warning_count']), | |
str(j_obj['amp_required_markup']['warning_status']), | |
str(j_obj['amp_validation']['status']), | |
str(j_obj['google_amp_cache']['status']), | |
str(j_obj['google_amp_cache']['result']), | |
str(j_obj['google_amp_cache']['google_amp_cache_url']), | |
str(j_obj['google_amp_cache']['google_amp_viewer_url']), | |
str(j_obj['robots']['robots_txt_status']), | |
str(j_obj['robots']['robots_txt_googlebot_status']), | |
str(j_obj['robots']['robots_txt_googlebot_smartphone_status']), | |
str(j_obj['robots']['robots_meta_status']), | |
str(j_obj['robots']['x_robots_tag_header_status']), | |
str(j_obj['sd_validation']['status']), | |
str(j_obj['sd_validation']['result']), | |
str(j_obj['sd_validation']['sd_kind']), | |
str(j_obj['sd_validation']['sd_type']), | |
str(j_obj['sd_validation']['sd_type_is_amp']), | |
str(j_obj['sd_validation']['sd_logo_image']['status']), | |
str(j_obj['sd_validation']['sd_logo_image']['result']), | |
str(j_obj['sd_validation']['sd_article']['status']), | |
str(j_obj['sd_validation']['sd_article']['result']) | |
]) | |
urls.close() | |
print ("Writing to " + outputcsv + " complete.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment