Created
December 17, 2019 18:02
-
-
Save allenanie/66d92e5bf6bf1da89bb50f21f970f4e9 to your computer and use it in GitHub Desktop.
Parsing latex table in CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
We write a way to convert LaTex to CSV | |
""" | |
import csv | |
import re | |
def to_csv(latex_text, file_name): | |
"""We learn to parse the text. | |
We assume the very first line tells the format of the table! | |
Arguments: | |
latex_text {[type]} -- [description] | |
""" | |
rows = [] | |
for i, line in enumerate(latex_text.split("\n")): | |
if '&' in line: | |
# then we process | |
row = [] | |
for cell in line.strip().split('&'): | |
cell = cell.strip().replace("\\", "").replace('~~', " ").replace("\\%", '%') | |
cell = cell.replace("\x08egin{tabular}{@{}l@{}}", "") | |
cell = cell.replace("end{tabular}", "") | |
cell = cell.replace(" ~ ", "") | |
row.append(cell.strip()) | |
rows.append(row) | |
with open(file_name, 'w') as f: | |
csv_writer = csv.writer(f) | |
for row in rows: | |
csv_writer.writerow(row) | |
if __name__ == "__main__": | |
latex_text = """{\begin{tabular}{@{}llll@{}} | |
\toprule | |
& ACSC-related Visits & Non-ACSC-related Visits \\ | |
Predictor Variables & (n=823,759) & (n=1,926,289) \\ \colrule % (n=2,711,839) | |
Age, mean (SD) & 59.91 (17.99) & 39.66 (23.03) \\ | |
Race, num (\%) & & \\ | |
~~ White & 615237 (74.69\%) & 1319754 (68.51\%) \\ | |
~~ \begin{tabular}{@{}l@{}}Black or \\ ~ African American \end{tabular} & 61785 (7.5\%) & 110755 (5.75\%) & \\ | |
~~ Vietnamese & 2063 (0.25\%) & 2860 (0.15\%) & \\ | |
~~ \begin{tabular}{@{}l@{}}American Indian or \\ ~ Alaska Native \end{tabular} & 2054 (0.25\%) & 6148 (0.32\%) & \\ | |
~~ Filipino & 1001 (0.12\%) & 1249 (0.06\%) & \\ | |
~~ \begin{tabular}{@{}l@{}}Native Hawaiian or \\ ~ Other Pacific Islander \end{tabular} & 949 (0.12\%) & 2804 (0.15\%) & \\ | |
~~ Japanese & 678 (0.08\%) & 719 (0.04\%) & \\ | |
~~ Other Pacific Islander & 543 (0.07\%) & 1537 (0.08\%) & \\ | |
~~ Chinese & 198 (0.02\%) & 780 (0.04\%) & \\ | |
~~ Native Hawaiian & 147 (0.02\%) & 592 (0.03\%) & \\ | |
~~ Korean & 31 (0.0\%) & 80 (0.0\%) & \\ | |
~~ Asian Indian & 24 (0.0\%) & 25 (0.0\%) & \\ | |
~~ Other & $<11$ (0.0\%) & $<11$ (0.0\%) & \\ | |
~~ Missing & 139,045 (16.9\%) & 478,976 (24.9\%) \\ | |
Ethnicity, num (\%) & & \\ | |
~~ Non-Hispanic or Latino & 580099 (70.42\%) & 1218670 (63.27\%) & \\ | |
~~ Hispanic or Latino & 63875 (7.75\%) & 195740 (10.16\%) & \\ | |
~~ Mexican & 2447 (0.3\%) & 3838 (0.2\%) & \\ | |
~~ Central American & 166 (0.02\%) & 242 (0.01\%) & \\ | |
~~ Puerto Rican & 71 (0.01\%) & 159 (0.01\%) & \\ | |
~~ Cuban & 23 (0.0\%) & 52 (0.0\%) & \\ | |
~~ Other & 14 (0.0\%) & 42 (0.0\%) & \\ | |
~~ Missing & 177,064 (21.5\%) & 507,546 (26.3\%) \\ | |
Tobacco, num (\%) & & \\ | |
~~ Never Smoked & 311,761 (70.46\%) & 826,680 (76.36\%) & \\ | |
~~ Prior History & 78,799 (17.81\%) & 146,505 (13.53\%) & \\ | |
~~ Active Smoker & 51,876 (11.73\%) & 109,393 (10.1\%) & \\ | |
~~ Missing & 289,938 (35.2\%) & 925,961 (48.1\%) \\ | |
\botrule | |
\end{tabular}} | |
""" | |
to_csv(latex_text, "Table1Demographics.csv") | |
table2 = """\begin{table}[h] | |
\tbl{Socioeconomics Status (SES) Characteristics of the Patients. We report mean (standard deviation) in the table. SD computed over each individual.} | |
{\begin{tabular}{@{}lcc@{}} | |
\toprule | |
& ACSC-related Visits & Non-ACSC-related Visits \\ | |
Predictor Variables & (n=648,041) & (n=2,063,798) \\ \colrule | |
\begin{tabular}{@{}l@{}} The median household income \end{tabular} & 54,548.71 (20175.04) & 56,810.59 (21913.58) \\ | |
\begin{tabular}{@{}l@{}} Frac of high needs population \end{tabular} & 0.38 (0.11) & 0.37 (0.12) \\ | |
\begin{tabular}{@{}l@{}} Frac of population living in \\ ~~ renter occupied units \end{tabular} & 0.3 (0.15) & 0.3 (0.16) \\ | |
\begin{tabular}{@{}l@{}} Frac of households with children \\ ~~ and a single parent \end{tabular} & 0.16 (0.08) & 0.16 (0.08) \\ | |
\begin{tabular}{@{}l@{}} Frac of families with incomes $<$ 100\% \\ ~~ of the Federal Poverty Level (FPL) \end{tabular} & 0.15 (0.09) & 0.14 (0.09) \\ | |
\begin{tabular}{@{}l@{}} Frac of African American population \end{tabular} & 0.1 (0.15) & 0.08 (0.14) \\ | |
\begin{tabular}{@{}l@{}} Frac of Hispanic population \end{tabular} & 0.15 (0.21) & 0.16 (0.22) \\ | |
\begin{tabular}{@{}l@{}} Frac of households receiving public assistance \end{tabular} & 0.14 (0.08) & 0.13 (0.08) \\ | |
\begin{tabular}{@{}l@{}} Frac of population with no health \\ ~~ insurance coverage \end{tabular} & 0.14 (0.06) & 0.13 (0.06) \\ | |
\begin{tabular}{@{}l@{}} Frac of people age 25 or older \\ ~~ who have no high school degree \end{tabular} & 0.14 (0.1) & 0.13 (0.1) \\ | |
\begin{tabular}{@{}l@{}} Frac of houses that are vacant \end{tabular} & 0.13 (0.08) & 0.12 (0.08) \\ | |
\begin{tabular}{@{}l@{}} Frac of adults who are unemployed \end{tabular} & 0.08 (0.04) & 0.07 (0.04) \\ | |
\begin{tabular}{@{}l@{}} Frac of population that are foreign born \end{tabular} & 0.08 (0.1) & 0.09 (0.1) \\ | |
\begin{tabular}{@{}l@{}} Frac of household with no car \end{tabular} & 0.06 (0.05) & 0.05 (0.04) \\ | |
\begin{tabular}{@{}l@{}} Frac of population living in \\ ~~ crowded housing units \end{tabular} & 0.03 (0.04) & 0.03 (0.05) \\ | |
\botrule | |
\end{tabular}}\label{tab:ses-demo} | |
\end{table} | |
""" | |
to_csv(table2, "Table2SES.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment