Last active
February 14, 2022 15:06
-
-
Save teticio/88f760869ba7e7f2570e3474da0eafab to your computer and use it in GitHub Desktop.
Rank competitors by leveraging Google's autocomplete function while searching for "<target> vs"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import requests | |
from tqdm import tqdm | |
from pprint import pprint | |
from itertools import islice | |
from urllib.parse import quote | |
from functools import lru_cache | |
from collections import Counter, OrderedDict | |
@lru_cache | |
def autocomplete(request): | |
response = requests.get( | |
f"http://google.com/complete/search?output=toolbar&client=chrome&q={quote(request)}" | |
) | |
return [item[len(request) + 1:] for item in response.json()[1] | |
], response.json()[4]['google:suggestrelevance'] | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('target', type=str) | |
parser.add_argument('--top_k', type=int) | |
parser.add_argument('--iterations', type=int) | |
args = parser.parse_args() | |
top_k = args.top_k or 10 | |
iterations = args.iterations or 10 | |
items = Counter({args.target: 0}) | |
for iteration in tqdm(range(iterations)): | |
for keyword in items.copy(): | |
try: | |
request = f"{keyword} vs" | |
keywords, scores = autocomplete(request) | |
new_items = Counter(dict(zip(keywords, scores))) | |
items = Counter(dict((items + new_items).most_common(top_k))) | |
except: | |
continue | |
pprint(items) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
e.g.
python competitive-landscape.py mlflow