mh0w · January 27, 2025 16:16
diff --git a/Extract table schemas from an EPIDD.py b/Extract table schemas from an EPIDD.py
 """
 Extract table schemas from an EPIDD.

 Requirements: pip install --upgrade pandas xlsxwriter python-docx
 """

 import pandas as pd
 from docx.api import Document

 doc_path = "C:/path/to/my.docx"
 out_path = "C:/path/to/my.xlsx"

 document = Document(doc_path)

 my_dict = {}
 j = 0

 with pd.ExcelWriter(out_path) as my_workbook:
    for i in (
        list(range(23, 67, 2))
        + list(range(68, 72, 2))
        + list(range(73, 83, 2))
        + list(range(84, 108, 2))
    ):
        j += 1
        print(f"5.{j}")
        my_dict[f"5.{j}"] = document.tables[i]

        data = []
        keys = None

        for i, row in enumerate(my_dict[f"5.{j}"].rows):
            text = (cell.text for cell in row.cells)

            if i == 0:
                keys = tuple(text)
                continue
            row_data = dict(zip(keys, text))
            data.append(row_data)

        my_dict[f"5.{j}"] = pd.DataFrame(data)

        # export to excel file
        my_dict[f"5.{j}"].to_excel(my_workbook, sheet_name=f"5.{j}", index=False)

        # Resize the worksheet columns
        my_workbook.sheets[f"5.{j}"].set_column(1, 1, 30)
        my_workbook.sheets[f"5.{j}"].set_column(2, 2, 20)
        my_workbook.sheets[f"5.{j}"].set_column(3, 3, 115)
        my_workbook.sheets[f"5.{j}"].set_column(4, 4, 30)

 print(f"See '{out_path}'")
	"""
	Extract table schemas from an EPIDD.

	Requirements: pip install --upgrade pandas xlsxwriter python-docx
	"""

	import pandas as pd
	from docx.api import Document

	doc_path = "C:/path/to/my.docx"
	out_path = "C:/path/to/my.xlsx"

	document = Document(doc_path)

	my_dict = {}
	j = 0

	with pd.ExcelWriter(out_path) as my_workbook:
	for i in (
	list(range(23, 67, 2))
	+ list(range(68, 72, 2))
	+ list(range(73, 83, 2))
	+ list(range(84, 108, 2))
	):
	j += 1
	print(f"5.{j}")
	my_dict[f"5.{j}"] = document.tables[i]

	data = []
	keys = None

	for i, row in enumerate(my_dict[f"5.{j}"].rows):
	text = (cell.text for cell in row.cells)

	if i == 0:
	keys = tuple(text)
	continue
	row_data = dict(zip(keys, text))
	data.append(row_data)

	my_dict[f"5.{j}"] = pd.DataFrame(data)

	# export to excel file
	my_dict[f"5.{j}"].to_excel(my_workbook, sheet_name=f"5.{j}", index=False)

	# Resize the worksheet columns
	my_workbook.sheets[f"5.{j}"].set_column(1, 1, 30)
	my_workbook.sheets[f"5.{j}"].set_column(2, 2, 20)
	my_workbook.sheets[f"5.{j}"].set_column(3, 3, 115)
	my_workbook.sheets[f"5.{j}"].set_column(4, 4, 30)

	print(f"See '{out_path}'")