Created
February 17, 2014 09:40
-
-
Save public/9047618 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import time | |
import hotshot | |
import hotshot.stats | |
import ipdb | |
import openpyxl | |
def get_process_rss(): | |
procstat = open("/proc/self/status").readlines() | |
for line in procstat: | |
if line.startswith("VmSize:"): | |
return int(line.split()[1]) * 1024 | |
else: | |
return 0 | |
def highest_row(sheet): | |
return sheet.get_highest_row() | |
def highest_column(sheet): | |
try: | |
return max( | |
openpyxl.cell.column_index_from_string(cell.column) | |
for cell in sheet._cells.itervalues() | |
) - 1 | |
except ValueError: | |
return 1 | |
def main(args): | |
start = time.time() | |
start_mem = get_process_rss() | |
print "start", start, start_mem | |
wb = openpyxl.load_workbook(args[1]) | |
opened = time.time() | |
opened_mem = get_process_rss() | |
print "open", opened-start, opened_mem-start_mem | |
# We are going to go and count the numberof cells with values, | |
# the number of cells within our bounding box, and the number of | |
# cell.value accesses we do. | |
values = 0 | |
cells = 0 | |
accesses = 0 | |
for sheet in wb.worksheets: | |
rows = highest_row(sheet) | |
columns = highest_column(sheet) | |
for r in xrange(rows): | |
blanks = 0 | |
for c in xrange(columns): | |
cell = sheet.cell(row=r, column=c) | |
if cell.value is not None: | |
values += 1 | |
else: | |
blanks += 1 | |
cell.offset(row=1, column=1).value | |
accesses += 2 | |
cells += 1 | |
if blanks == c+1: | |
break | |
end = time.time() | |
total = end-start | |
done_mem = get_process_rss() | |
print "read", total, cells, values, accesses, done_mem-start_mem | |
cells = float(cells) | |
accesses = float(accesses) | |
values = float(values) | |
print (total/accesses)*1000, "ms per cell access" | |
print (done_mem-start_mem) / cells, "bytes per cell" | |
print (done_mem-start_mem) / values, "bytes per value" | |
if 0: | |
prof = hotshot.Profile("speed.prof") | |
prof.runcall(main, sys.argv) | |
prof.close() | |
stats = hotshot.stats.load("speed.prof") | |
stats.strip_dirs() | |
stats.sort_stats('time', 'calls') | |
ipdb.set_trace() | |
else: | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment