Skip to content

Instantly share code, notes, and snippets.

@Lazhari
Created September 2, 2015 14:12
Show Gist options
  • Save Lazhari/37cbe6e7e78eddea7a52 to your computer and use it in GitHub Desktop.
Save Lazhari/37cbe6e7e78eddea7a52 to your computer and use it in GitHub Desktop.
Reading Excel Files with Python
#!/usr/bin/env python
"""
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples
Please see the test function for the expected return format
"""
import xlrd
from zipfile import ZipFile
datafile = "2013_ERCOT_Hourly_Load_Data.xls"
def open_zip(datafile):
with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
myzip.extractall()
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
### example on how you can get the data
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]
### other useful methods:
# print "\nROWS, COLUMNS, and CELLS:"
# print "Number of rows in the sheet:",
# print sheet.nrows
# print "Type of data in cell (row 3, col 2):",
# print sheet.cell_type(3, 2)
# print "Value in cell (row 3, col 2):",
# print sheet.cell_value(3, 2)
# print "Get a slice of values in column 3, from rows 1-3:"
# print sheet.col_values(3, start_rowx=1, end_rowx=4)
# print "\nDATES:"
# print "Type of data in cell (row 1, col 0):",
# print sheet.cell_type(1, 0)
# exceltime = sheet.cell_value(1, 0)
# print "Time in Excel format:",
# print exceltime
# print "Convert time to a Python datetime tuple, from the Excel float:",
# print xlrd.xldate_as_tuple(exceltime, 0)
column_coast = sheet.col_values(1, start_rowx=1)
maxvalue = round(max(column_coast),10)
minvalue = round(min(column_coast),10)
avgcoast = round(reduce(lambda x, y: x + y, column_coast) / len(column_coast),10)
column_time = sheet.col_values(0, start_rowx=1)
maxtime = xlrd.xldate_as_tuple(column_time[column_coast.index(max(column_coast))],0)
mintime = xlrd.xldate_as_tuple(column_time[column_coast.index(min(column_coast))],0)
data = {
'maxtime': maxtime,
'maxvalue': maxvalue,
'mintime': mintime,
'minvalue': minvalue,
'avgcoast': avgcoast
}
return data
def test():
open_zip(datafile)
data = parse_file(datafile)
#print data
assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
assert round(data['maxvalue'], 10) == round(18779.02551, 10)
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment