Last active
January 27, 2025 16:16
-
-
Save mh0w/8365e880fe640b830860354da33f914e to your computer and use it in GitHub Desktop.
Extract table schemas from an EPIDD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Extract table schemas from an EPIDD. | |
Requirements: pip install --upgrade pandas xlsxwriter python-docx | |
""" | |
import pandas as pd | |
from docx.api import Document | |
doc_path = "C:/path/to/my.docx" | |
out_path = "C:/path/to/my.xlsx" | |
document = Document(doc_path) | |
my_dict = {} | |
j = 0 | |
with pd.ExcelWriter(out_path) as my_workbook: | |
for i in ( | |
list(range(23, 67, 2)) | |
+ list(range(68, 72, 2)) | |
+ list(range(73, 83, 2)) | |
+ list(range(84, 108, 2)) | |
): | |
j += 1 | |
print(f"5.{j}") | |
my_dict[f"5.{j}"] = document.tables[i] | |
data = [] | |
keys = None | |
for i, row in enumerate(my_dict[f"5.{j}"].rows): | |
text = (cell.text for cell in row.cells) | |
if i == 0: | |
keys = tuple(text) | |
continue | |
row_data = dict(zip(keys, text)) | |
data.append(row_data) | |
my_dict[f"5.{j}"] = pd.DataFrame(data) | |
# export to excel file | |
my_dict[f"5.{j}"].to_excel(my_workbook, sheet_name=f"5.{j}", index=False) | |
# Resize the worksheet columns | |
my_workbook.sheets[f"5.{j}"].set_column(1, 1, 30) | |
my_workbook.sheets[f"5.{j}"].set_column(2, 2, 20) | |
my_workbook.sheets[f"5.{j}"].set_column(3, 3, 115) | |
my_workbook.sheets[f"5.{j}"].set_column(4, 4, 30) | |
print(f"See '{out_path}'") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment