Created
January 9, 2025 15:20
-
-
Save kschlottmann/b7ad13883ec3ac79660b306b1809cdbc to your computer and use it in GitHub Desktop.
Loads Excel file in AS import template format, uses Pandas to crudely split dates from titles
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
#load csv file - NB first remove extra rows at top, need to replace these before import | |
#df = pd.read_csv('clean_input.csv', encoding = 'ISO-8859-1', low_memory=False) | |
#load Excel directly, - NB first remove extra rows at top, need to replace these before import | |
df = pd.read_excel(open('clean_input.xlsx','rb')) | |
#print column headings | |
#print(list(df)) | |
#define x as the number of rows | |
for x in range(0, 10893): | |
title = df.at[x, "title"] | |
date = df.at[x, "expression"] | |
#Check to see if date expression already exists | |
if isinstance(date, str): | |
print("nothing") | |
else: | |
newdate = title.rsplit(', ', 1) | |
if len(newdate) < 2: | |
print("no len date") | |
else: | |
df.at[x, "title"] = newdate[0] | |
df.at[x, "expression"] = newdate[1] | |
print(newdate) | |
#output csv | |
#df.to_csv('out.csv') | |
#output xslx | |
df.to_excel('out.xlsx') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment