Last active
July 15, 2019 19:31
-
-
Save gregroberts/11001277 to your computer and use it in GitHub Desktop.
dlgt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import httplib | |
import urllib | |
import urllib2 | |
import re | |
import csv | |
import logging | |
from cookielib import CookieJar | |
class pyGTrends(object): | |
""" | |
Google Trends API | |
Recommended usage: | |
r = pyGTrends(username, password) | |
trends_data = r.download_report(('pants', 'skirt')) | |
""" | |
def __init__(self, username, password): | |
""" | |
provide login and password to be used to connect to Google Analytics | |
all immutable system variables are also defined here | |
website_id is the ID of the specific site on google analytics | |
""" | |
self.login_params = { | |
"continue": 'http://www.google.com/trends', | |
"PersistentCookie": "yes", | |
"Email": username, | |
"Passwd": password, | |
} | |
self.headers = [("Referrer", "https://www.google.com/accounts/ServiceLoginBoxAuth"), | |
("Content-type", "application/x-www-form-urlencoded"), | |
('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21'), | |
("Accept", "text/plain")] | |
self.url_ServiceLoginBoxAuth = 'https://accounts.google.com/ServiceLoginBoxAuth' | |
self.url_Export = 'http://www.google.com/accounts/ServiceLoginBoxAuth' | |
self.url_CookieCheck = 'https://www.google.com/accounts/CheckCookie?chtml=LoginDoneHtml' | |
self.url_PrefCookie = 'http://www.google.com' | |
self.header_dictionary = {} | |
self._connect() | |
def _connect(self): | |
""" | |
connect to Google Trends | |
""" | |
self.cj = CookieJar() | |
cook = urllib2.HTTPCookieProcessor(self.cj) | |
self.opener = urllib2.build_opener(cook) | |
self.opener.addheaders = self.headers | |
#galx = re.compile('<input type="hidden"[\s]+name="GALX"[\s]+value="(?P<galx>[a-zA-Z0-9_-]+)">') | |
galx=re.compile('<input name="GALX"[\s]+type="hidden"[\s]+value="(?P<galx>[a-zA-Z0-9_-]+)">') | |
resp = self.opener.open(self.url_ServiceLoginBoxAuth).read() | |
resp = re.sub(r'\s\s+', ' ', resp) | |
m = galx.search(resp) | |
# if not m: | |
# raise Exception("Cannot parse GALX out of login page") | |
self.login_params['GALX'] = m.group('galx') | |
params = urllib.urlencode(self.login_params) | |
self.opener.open(self.url_ServiceLoginBoxAuth, params) | |
self.opener.open(self.url_CookieCheck) | |
self.opener.open(self.url_PrefCookie) | |
def download_report(self, keywords, date='all', geo='all', geor='all', graph = 'all_csv', sort=0, scale=0, sa='N'): | |
""" | |
download a specific report | |
date, geo, geor, graph, sort, scale and sa | |
are all Google Trends specific ways to slice the data | |
""" | |
if type(keywords) not in (type([]), type(('tuple',))): | |
keywords = [keywords] | |
params = urllib.urlencode({ | |
'q': ",".join(keywords), | |
'date': date, | |
'graph': graph, | |
'geo': geo, | |
'geor': geor, | |
'sort': str(sort), | |
'scale': str(scale), | |
'sa': sa | |
}) | |
self.raw_data = self.opener.open('http://www.google.com/trends/viz?' + params).read() | |
#self.raw_data = self.opener.open('https://www.google.com/trends/trendsReport?hl=en-US&content=1&q=foo&hl=en-US&content=1').read() | |
if self.raw_data in ['You must be signed in to export data from Google Trends']: | |
logging.error('You must be signed in to export data from Google Trends') | |
raise Exception(self.raw_data) | |
return self.raw_data | |
if __name__ == '__main__': | |
#if this still doesn't work, I'm out of ideas. | |
username = raw_input('Enter your Google username: \n') | |
password = raw_input('Enter your Google password: \n') | |
a = pyGTrends(username, password) | |
keyword = 'justin beiber' | |
thedata = a.download_report(keyword) | |
print thedata | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I ran the code, it runs asks for the password and the email, but it keeps on running doesnot stop runs forever. Where does the CSV file goes. I am not able to extract the CSV file. Where is it stored or how do I retireve the CSV file ?