Last active
March 19, 2022 15:54
-
-
Save jjjake/c8d7b81e5d009c12c1850c7fae0c3c65 to your computer and use it in GitHub Desktop.
This script converts a Microsoft Excel spreadsheet to a UTF-8 CSV file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Convert a Microsoft Excel spreadsheet to a UTF-8 csv. | |
Usage: | |
# Make sure requrirements are installed. | |
$ sudo pip install xlrd backports.csv | |
# Run script. | |
$ python convert_xls_to_utf8_csv.py <spreadsheet> | |
The CSV will be saved to the same path as the original file, but with | |
'.csv' appended. For example, '~/spreadsheets/foo.xlsx' would be saved | |
to '~/spreadsheets/foo.xlsx.csv'. | |
""" | |
import sys | |
import csv | |
import json | |
import io | |
import xlrd | |
import backports.csv as csv | |
def convert_xls_to_dict(filepath, sheet_index=0): | |
"""Convert a Microsoft Word spreadsheet to a python dict.""" | |
workbook = xlrd.open_workbook(filepath) | |
worksheet = workbook.sheet_by_index(sheet_index) | |
first_row = list() | |
for col in range(worksheet.ncols): | |
first_row.append(worksheet.cell_value(0, col)) | |
data = list() | |
for row in range(1, worksheet.nrows): | |
elm = dict() | |
for col in range(worksheet.ncols): | |
elm_key = first_row[col].split(':', 1)[-1].lower() | |
elm[elm_key] = worksheet.cell_value(row, col) | |
data.append(elm) | |
return data | |
if __name__ == '__main__': | |
with io.open('{}.csv'.format(sys.argv[-1]), 'w', newline='', encoding='utf-8') as fh: | |
writer = csv.writer(fh) | |
for i, row in enumerate(convert_xls_to_dict(sys.argv[-1])): | |
# Write header if first row. | |
if i == 0: | |
writer.writerow(row.keys()) | |
writer.writerow(row.values()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment