Created
April 14, 2013 17:51
-
-
Save mcsquaredjr/5383587 to your computer and use it in GitHub Desktop.
gstest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
username = 'your email' | |
passwd = 'your pass' | |
doc_name = 'PV data' | |
import sys | |
sys.path.append('../gdata') | |
import gdata.docs | |
import gdata.docs.service | |
import gdata.spreadsheet.service | |
import re, os | |
class GoogleSpreadsheet(object): | |
'''Wrap up google gdata methods to access a Google spreadsheet''' | |
def __init__(self, email, passwd): | |
# Connect to Google account | |
self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() | |
self.gd_client.email = username | |
self.gd_client.password = passwd | |
self.gd_client.source = 'GoogleSpreadsheet wrapper class' | |
self.gd_client.ProgrammaticLogin() | |
def get_spreadsheet_id(self, spreadsheet_title): | |
'''Get spreadsheet id by spreadsheet title''' | |
doc_query = gdata.spreadsheet.service.DocumentQuery() | |
doc_query['title'] = spreadsheet_title | |
doc_query['title-exact'] = 'true' | |
ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query) | |
spreadsheet_id = ss_feed.entry[0].id.text.rsplit('/',1)[1] | |
return spreadsheet_id | |
def get_worksheet_id(self, spreadsheet_id, workbook_name): | |
'''Get worksheet id by spreadsheet id and workbook name''' | |
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_id) | |
d = self.worksheet_dict(spreadsheet_id) | |
try: | |
id = d[workbook_name] | |
except KeyError, e: | |
print e | |
id = None | |
return id | |
def worksheet_dict(self, spreadsheet_id): | |
'''Create dictionary containing worsheet's ids with keys equal to | |
their names if spreadsheet_id is given. | |
''' | |
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_id) | |
d = dict() | |
for i, entry in enumerate(ws_feed.entry): | |
d[entry.title.text] = entry.id.text.split('/')[-1] | |
return d | |
def _get_num_cols(self, range): | |
'''Compute the number of rows in a range''' | |
# It's ugly, but I did not find a method in the API that does it | |
pattern = re.compile('[\d]*$') | |
first, second = range.split(':') | |
m1 = re.search(pattern, first) | |
m2 = re.search(pattern, second) | |
ind1 = int(first[m1.start():]) | |
ind2 = int(second[m2.start():]) | |
num_cols = ind2 - ind1 + 1 | |
return num_cols | |
def get_range(self, spreadsheet_id, worksheet_id, rng): | |
'''Return cell range as a list of tuples, so that each element of | |
the tuple represents a row of data in the spreadsheet. The range | |
should be provided following standard R1C1 notation. This method | |
always returns a rectangular array. Empty cells are returned as | |
None. | |
Parameters: | |
spreadhseet_id -- spreadhseet id | |
worksheet_id -- worksheet | |
rng -- range of cells, e.g. 'A2:R23' | |
''' | |
# If range is given | |
if rng is not None: | |
cell_query = gdata.spreadsheet.service.CellQuery() | |
cell_query['range'] = rng | |
cell_query['return-empty'] = 'true' # oh, my! | |
else: | |
cell_query = None | |
cell_feed = self.gd_client.GetCellsFeed(spreadsheet_id, worksheet_id, query=cell_query) | |
entry = cell_feed.entry | |
num_cols = self._get_num_cols(rng) | |
num_rows = len(entry) / num_cols | |
cells = [] | |
for ii in range(len(entry)): | |
cells.append(entry[ii].content.text) | |
# Now reshape it to create num_rowsXnum_cols lists of tuples | |
cells = zip(*[iter(cells)]*num_rows) | |
return cells | |
def get_cols(self, spreadsheet_id, worksheet_id, query=None): | |
'''Reads entire workbook and returns dictionary, that contains | |
column names as keys and list of column elements as dictionary | |
values. | |
''' | |
if query is None: | |
rows = self.gd_client.GetListFeed(spreadsheet_id, worksheet_id) | |
else: | |
rows = self.gd_client.GetListFeed(spreadsheet_id, worksheet_id, query) | |
d = dict() | |
# Build dictionary | |
for row in rows.entry: | |
for key in row.custom: | |
if key in d.keys(): | |
d[key].append(row.custom[key].text) | |
else: | |
d[key] = [row.custom[key].text] | |
return d | |
def get_list_query(self, query_str): | |
'''Retun an instance of ListQuery suitable to pass to the get_cols | |
method. You may set query parameters by setting up query keys. | |
Query keys: | |
sq: for general query such as 'name=john&last=smith' | |
orderby: for example: 'column:first' | |
reverse: values are 'true' and 'false', note these are string. | |
''' | |
list_query = gdata.spreadsheet.service.ListQuery() | |
return list_query | |
if __name__ == '__main__': | |
# Let's roll | |
gs = GoogleSpreadsheet(username, passwd) | |
sid = gs.get_spreadsheet_id(doc_name) | |
print sid | |
wid = gs.get_worksheet_id(sid, 'Sheet1') | |
print wid | |
cls = gs.get_range(sid, wid, 'A1:C5') | |
print cls | |
d = gs.get_cols(sid, wid) | |
print d | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment