-
-
Save h0rn3t/2a80792d0c9f07ab8961fcb6e8e3a742 to your computer and use it in GitHub Desktop.
Benchmark of several Python Excel writing modules
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################## | |
# | |
# Simple Python program to benchmark several Python Excel writing modules. | |
# | |
# python bench_excel_writers.py [num_rows] [num_cols] | |
# | |
# | |
import sys | |
import resource | |
import random | |
from multiprocessing import Process | |
from time import clock | |
import openpyxl | |
import pyexcelerate | |
import xlsxwriter | |
import xlwt | |
from openpyxl.cell import get_column_letter | |
# Default to 1000 rows x 50 cols. | |
if len(sys.argv) > 1: | |
row_max = int(sys.argv[1]) | |
col_max = 50 | |
else: | |
row_max = 1000 | |
col_max = 50 | |
if len(sys.argv) > 2: | |
col_max = int(sys.argv[2]) | |
def print_elapsed_time(module_name, elapsed, elapsed_rss): | |
""" Print module run times in a consistent format. """ | |
print(" %-22s: %6.2f %6.2f" % (module_name, elapsed, elapsed_rss)) | |
def time_xlsxwriter(): | |
""" Run XlsxWriter in default mode. """ | |
start_time = clock() | |
start_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
workbook = xlsxwriter.Workbook('xlsxwriter.xlsx') | |
worksheet = workbook.add_worksheet() | |
for row in range(row_max // 2): | |
for col in range(col_max): | |
worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col)) | |
for col in range(col_max): | |
worksheet.write_number(row * 2 + 1, col, row + col) | |
workbook.close() | |
elapsed = clock() - start_time | |
elapsed_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - start_rss | |
print_elapsed_time('xlsxwriter', elapsed, elapsed_rss) | |
def time_xlsxwriter_optimised(): | |
""" Run XlsxWriter in optimised/constant memory mode. """ | |
start_time = clock() | |
start_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
workbook = xlsxwriter.Workbook('xlsxwriter_opt.xlsx', | |
{'constant_memory': True}) | |
worksheet = workbook.add_worksheet() | |
for row in range(row_max // 2): | |
for col in range(col_max): | |
worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col)) | |
for col in range(col_max): | |
worksheet.write_number(row * 2 + 1, col, row + col) | |
workbook.close() | |
elapsed = clock() - start_time | |
elapsed_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - start_rss | |
print_elapsed_time('xlsxwriter (optimised)', elapsed, elapsed_rss) | |
def time_openpyxl(): | |
""" Run OpenPyXL in default mode. """ | |
start_time = clock() | |
start_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
workbook = openpyxl.workbook.Workbook() | |
worksheet = workbook.active | |
for row in range(row_max // 2): | |
for col in range(col_max): | |
colletter = get_column_letter(col + 1) | |
worksheet.cell('%s%s' % (colletter, row * 2 + 1)).value = "Row: %d Col: %d" % (row, col) | |
for col in range(col_max): | |
colletter = get_column_letter(col + 1) | |
worksheet.cell('%s%s' % (colletter, row * 2 + 2)).value = row + col | |
workbook.save('openpyxl.xlsx') | |
elapsed = clock() - start_time | |
elapsed_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - start_rss | |
print_elapsed_time('openpyxl', elapsed, elapsed_rss) | |
def time_openpyxl_optimised(): | |
""" Run OpenPyXL in optimised mode. """ | |
start_time = clock() | |
start_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
workbook = openpyxl.workbook.Workbook(optimized_write=True) | |
worksheet = workbook.create_sheet() | |
for row in range(row_max // 2): | |
string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)] | |
worksheet.append(string_data) | |
num_data = [row + col for col in range(col_max)] | |
worksheet.append(num_data) | |
workbook.save('openpyxl_opt.xlsx') | |
elapsed = clock() - start_time | |
elapsed_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - start_rss | |
print_elapsed_time('openpyxl (optimised)', elapsed, elapsed_rss) | |
def time_pyexcelerate(): | |
""" Run pyexcelerate in "faster" mode. """ | |
start_time = clock() | |
start_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
workbook = pyexcelerate.Workbook() | |
worksheet = workbook.new_sheet('Sheet1') | |
for row in range(row_max // 2): | |
for col in range(col_max): | |
worksheet.set_cell_value(row * 2 + 1, col + 1, "Row: %d Col: %d" % (row, col)) | |
for col in range(col_max): | |
worksheet.set_cell_value(row * 2 + 2, col + 1, row + col) | |
workbook.save('pyexcelerate.xlsx') | |
elapsed = clock() - start_time | |
elapsed_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - start_rss | |
print_elapsed_time('pyexcelerate', elapsed, elapsed_rss) | |
def time_xlwt(): | |
""" Run xlwt in default mode. """ | |
start_time = clock() | |
start_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss | |
workbook = xlwt.Workbook() | |
worksheet = workbook.add_sheet('Sheet1') | |
for row in range(row_max // 2): | |
for col in range(col_max): | |
worksheet.write(row * 2, col, "Row: %d Col: %d" % (row, col)) | |
for col in range(col_max): | |
worksheet.write(row * 2 + 1, col, row + col) | |
workbook.save('xlwt.xls') | |
elapsed = clock() - start_time | |
elapsed_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - start_rss | |
print_elapsed_time('xlwt', elapsed, elapsed_rss) | |
print("") | |
print("Versions:") | |
print(" %-12s: %s" % ('python', sys.version[:5])) | |
print(" %-12s: %s" % ('openpyxl', openpyxl.__version__)) | |
print(" %-12s: %s" % ('pyexcelerate', pyexcelerate.__version__)) | |
print(" %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__)) | |
print(" %-12s: %s" % ('xlwt', xlwt.__VERSION__)) | |
print("") | |
print("Dimensions:") | |
print(" Rows = %d" % row_max) | |
print(" Cols = %d" % col_max) | |
print("") | |
print("Times:") | |
targets = [ | |
time_pyexcelerate, | |
time_xlwt, | |
time_xlsxwriter_optimised, | |
time_xlsxwriter, | |
time_openpyxl_optimised, | |
time_openpyxl, | |
] | |
random.shuffle(targets) | |
for target in targets: | |
p = Process(target=target) | |
p.start() | |
p.join() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Versions: | |
python : 3.5.0 | |
openpyxl : 2.3.3 | |
pyexcelerate: 0.6.6 | |
xlsxwriter : 0.8.4 | |
xlwt : 1.0.0 | |
Dimensions: | |
Rows = 1000 | |
Cols = 50 | |
Times: | |
pyexcelerate : 0.28 10488.00 | |
xlwt : 0.47 16932.00 | |
xlsxwriter (optimised): 0.57 1092.00 | |
xlsxwriter : 0.67 12124.00 | |
openpyxl (optimised): 1.17 40508.00 | |
openpyxl : 1.57 58584.00 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment