-
-
Save stephenLee/d373360ed531865d97fa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Downloads and cleans up a CSV file from a Google Trends query. | |
Usage: | |
trends.py [email protected] google.password /path/to/filename query1 [query2 ...] | |
Requires mechanize: | |
pip install mechanize | |
""" | |
import cookielib | |
import csv | |
import mechanize | |
import re | |
from StringIO import StringIO | |
import sys | |
def main(argv): | |
# Google Login credentials | |
username = argv[1] | |
password = argv[2] | |
# Where to save the CSV file | |
pathname = argv[3] | |
queries = ('q=' + query for query in argv[4:]) | |
br = mechanize.Browser() | |
# Create cookie jar | |
cj = cookielib.LWPCookieJar() | |
br.set_cookiejar(cj) | |
# Act like we're a real browser | |
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | |
# Login in to Google | |
response = br.open('https://accounts.google.com/ServiceLogin?hl=en&continue=https://www.google.com/') | |
forms = mechanize.ParseResponse(response) | |
form = forms[0] | |
form['Email'] = username | |
form['Passwd'] = password | |
response = br.open(form.click()) | |
# Get CSV from Google Trends | |
trends_url = 'http://www.google.com/trends/trendsReport?' | |
query_params = '&'.join(queries) | |
response = br.open(trends_url + query_params + '&export=1') | |
# Remove headers and footers from Google's CSV | |
# Use last date in date range | |
reader = csv.reader(StringIO(response.read())) | |
dates = [] | |
values = [] | |
for row in reader: | |
try: | |
date, value = row | |
except ValueError: | |
continue | |
if re.search('[0-9]{4}-[0-9]{2}-[0-9]{2}', date): | |
dates.append(date[-10:]) # Uses last date in time period | |
values.append(value) | |
with open(pathname, 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(['date', 'debt']) | |
for row in zip(dates, values): | |
writer.writerow(row) | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment