Created
September 2, 2015 14:12
-
-
Save Lazhari/37cbe6e7e78eddea7a52 to your computer and use it in GitHub Desktop.
Reading Excel Files with Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Your task is as follows: | |
- read the provided Excel file | |
- find and return the min, max and average values for the COAST region | |
- find and return the time value for the min and max entries | |
- the time values should be returned as Python tuples | |
Please see the test function for the expected return format | |
""" | |
import xlrd | |
from zipfile import ZipFile | |
datafile = "2013_ERCOT_Hourly_Load_Data.xls" | |
def open_zip(datafile): | |
with ZipFile('{0}.zip'.format(datafile), 'r') as myzip: | |
myzip.extractall() | |
def parse_file(datafile): | |
workbook = xlrd.open_workbook(datafile) | |
sheet = workbook.sheet_by_index(0) | |
### example on how you can get the data | |
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)] | |
### other useful methods: | |
# print "\nROWS, COLUMNS, and CELLS:" | |
# print "Number of rows in the sheet:", | |
# print sheet.nrows | |
# print "Type of data in cell (row 3, col 2):", | |
# print sheet.cell_type(3, 2) | |
# print "Value in cell (row 3, col 2):", | |
# print sheet.cell_value(3, 2) | |
# print "Get a slice of values in column 3, from rows 1-3:" | |
# print sheet.col_values(3, start_rowx=1, end_rowx=4) | |
# print "\nDATES:" | |
# print "Type of data in cell (row 1, col 0):", | |
# print sheet.cell_type(1, 0) | |
# exceltime = sheet.cell_value(1, 0) | |
# print "Time in Excel format:", | |
# print exceltime | |
# print "Convert time to a Python datetime tuple, from the Excel float:", | |
# print xlrd.xldate_as_tuple(exceltime, 0) | |
column_coast = sheet.col_values(1, start_rowx=1) | |
maxvalue = round(max(column_coast),10) | |
minvalue = round(min(column_coast),10) | |
avgcoast = round(reduce(lambda x, y: x + y, column_coast) / len(column_coast),10) | |
column_time = sheet.col_values(0, start_rowx=1) | |
maxtime = xlrd.xldate_as_tuple(column_time[column_coast.index(max(column_coast))],0) | |
mintime = xlrd.xldate_as_tuple(column_time[column_coast.index(min(column_coast))],0) | |
data = { | |
'maxtime': maxtime, | |
'maxvalue': maxvalue, | |
'mintime': mintime, | |
'minvalue': minvalue, | |
'avgcoast': avgcoast | |
} | |
return data | |
def test(): | |
open_zip(datafile) | |
data = parse_file(datafile) | |
#print data | |
assert data['maxtime'] == (2013, 8, 13, 17, 0, 0) | |
assert round(data['maxvalue'], 10) == round(18779.02551, 10) | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment