Skip to content

Instantly share code, notes, and snippets.

@barrucadu
Created August 13, 2019 10:42
Show Gist options
  • Save barrucadu/308365624f515aa125b01a4dbeb27bd5 to your computer and use it in GitHub Desktop.
Save barrucadu/308365624f515aa125b01a4dbeb27bd5 to your computer and use it in GitHub Desktop.
Python 2 scripts, but they do the job. Variables refer to ES2 because this is a small tweak on the original script (https://gist.github.com/brucebolt/ec211868299b8efbdde7877e9a2d3c10)
import csv
import re
results_es2 = {}
results_es5 = {}
with open('results.txt', 'rb') as f:
rows = csv.reader(f, delimiter='\t')
next(rows, None)
for row in rows:
for index, value in enumerate(row):
row[index] = re.sub(r'\s*$', '', value)
if row[0] not in results_es2:
results_es2[row[0]] = {}
results_es2[row[0]][row[1]] = row[3]
if row[0] not in results_es5:
results_es5[row[0]] = {}
results_es5[row[0]][row[1]] = row[4]
for key in results_es2:
found = 0
missing = []
for i in results_es2[key].keys():
if results_es2[key][i] in results_es5[key].values():
found += 1
else:
missing.append("{} ({})".format(results_es2[key][i], i))
result = 'TRUE' if found == len(results_es2[key]) else 'FALSE'
print "{}\t{}\t{}\t{}".format(key, found, result, ";".join(missing))
import requests
import sys
import urllib
import csv
print "Keyword\tPosition\tMatch\tA Result\tB Result"
counter = 1
with open('ga_export.csv', 'r') as f:
rows = csv.reader(f, delimiter=',')
next(rows, None) # skip header
for row in rows:
keyword = row[0]
sys.stderr.write("{}\n".format(str(counter)))
keyword = keyword.strip()
query = urllib.urlencode({'q': keyword})
try:
resp_es2 = requests.get("https://www.gov.uk/api/search.json?ab_tests=search_cluster_query:A&{}".format(query))
data_es2 = resp_es2.json()
resp_es5 = requests.get("https://www.gov.uk/api/search.json?ab_tests=search_cluster_query:B&{}".format(query))
data_es5 = resp_es5.json()
except:
continue
max_results = max(len(data_es2['results']), len(data_es5['results']))
num_results = min(10, max_results)
if num_results > 0:
for i in range(0, num_results):
es2_value = data_es2['results'][i]['link'] if i < len(data_es2['results']) else '(no result at this position)'
es5_value = data_es5['results'][i]['link'] if i < len(data_es5['results']) else '(no result at this position)'
match = 'MATCH' if es2_value == es5_value else ''
print keyword,"\t", i+1, "\t", match, "\t", es2_value, "\t", es5_value
counter += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment