Skip to content

Instantly share code, notes, and snippets.

@gwangjinkim
Last active November 6, 2019 06:26
Show Gist options
  • Save gwangjinkim/d02f1a6a9c4e29b0529c3983a308d826 to your computer and use it in GitHub Desktop.
Save gwangjinkim/d02f1a6a9c4e29b0529c3983a308d826 to your computer and use it in GitHub Desktop.
partly solution for csv parsing problem
import pandas as pd
df = pd.read_csv("XXXXX_20180101_20171231_start.csv", sep=";", header=None)
df_target = pd.read_csv("XXXX_20180101_20171231_test2_target.csv", sep=";", header=None)
"""
inspect by:
df.head()
0 1 2 3 4
0 1 XXXXX_CPTTEL_EAG01 11610.50 31/12/2017 00:00 0
1 2 XXXXX_CPTTEL_EAG04 143960.25 31/12/2017 00:00 0
2 3 XXXXX_CPTTEL_EAG07 86.57 31/12/2017 00:00 0
3 4 XXXXX_CPTTEL_EAS05 8263.12 31/12/2017 00:00 0
4 5 XXXXX_CPTTEL_EDV01 1455.90 31/12/2017 00:00 0
df_target.head()
0 1 2 3 4
0 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EAG04 901 143960.250
1 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EAG07 901 86.570
2 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EAS05 901 8263.120
3 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EDV01 901 1455.900
4 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EL010 402 47.178
"""
def reverse_date(s):
return '/'.join(s.split("/")[::-1])
def h_min_to_h_min_sec(s):
return s + ':00'
def time_transform(s):
date, h_min = s.split()
return ' '.join([reverse_date(date), h_min_to_h_min_sec(h_min)])
d = {df_target.iloc[i, 2]: df_target.iloc[i, 3] for i in range(df_target.shape[0])}
# this is just an example dictionary, but we can easily exchange the
# dictionary by a function also in the following definition
def transform(df, dictionary):
return pd.DataFrame([['Europe/Paris'] * df.shape[0],
[time_transform(x) for x in df.iloc[:, 3]],
[x for x in df.iloc[:, 1]],
[dictionary[x] for x in df.iloc[:, 1]],
[x for x in df.iloc[:, 2]]]).transpose()
my_df = transform(df, d)
"""
import zipfile
def read_zip_file(filepath):
zfile = zipfile.ZipFile(filepath)
for finfo in zfile.infolist():
ifile = zfile.open(finfo)
line_list = ifile.readlines()
print line_list
"""
@gwangjinkim
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment