Last active
November 6, 2019 06:26
-
-
Save gwangjinkim/d02f1a6a9c4e29b0529c3983a308d826 to your computer and use it in GitHub Desktop.
partly solution for csv parsing problem
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.read_csv("XXXXX_20180101_20171231_start.csv", sep=";", header=None) | |
df_target = pd.read_csv("XXXX_20180101_20171231_test2_target.csv", sep=";", header=None) | |
""" | |
inspect by: | |
df.head() | |
0 1 2 3 4 | |
0 1 XXXXX_CPTTEL_EAG01 11610.50 31/12/2017 00:00 0 | |
1 2 XXXXX_CPTTEL_EAG04 143960.25 31/12/2017 00:00 0 | |
2 3 XXXXX_CPTTEL_EAG07 86.57 31/12/2017 00:00 0 | |
3 4 XXXXX_CPTTEL_EAS05 8263.12 31/12/2017 00:00 0 | |
4 5 XXXXX_CPTTEL_EDV01 1455.90 31/12/2017 00:00 0 | |
df_target.head() | |
0 1 2 3 4 | |
0 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EAG04 901 143960.250 | |
1 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EAG07 901 86.570 | |
2 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EAS05 901 8263.120 | |
3 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EDV01 901 1455.900 | |
4 Europe/Paris 2017/12/31 00:00:00 XXXXX_CPTTEL_EL010 402 47.178 | |
""" | |
def reverse_date(s): | |
return '/'.join(s.split("/")[::-1]) | |
def h_min_to_h_min_sec(s): | |
return s + ':00' | |
def time_transform(s): | |
date, h_min = s.split() | |
return ' '.join([reverse_date(date), h_min_to_h_min_sec(h_min)]) | |
d = {df_target.iloc[i, 2]: df_target.iloc[i, 3] for i in range(df_target.shape[0])} | |
# this is just an example dictionary, but we can easily exchange the | |
# dictionary by a function also in the following definition | |
def transform(df, dictionary): | |
return pd.DataFrame([['Europe/Paris'] * df.shape[0], | |
[time_transform(x) for x in df.iloc[:, 3]], | |
[x for x in df.iloc[:, 1]], | |
[dictionary[x] for x in df.iloc[:, 1]], | |
[x for x in df.iloc[:, 2]]]).transpose() | |
my_df = transform(df, d) | |
""" | |
import zipfile | |
def read_zip_file(filepath): | |
zfile = zipfile.ZipFile(filepath) | |
for finfo in zfile.infolist(): | |
ifile = zfile.open(finfo) | |
line_list = ifile.readlines() | |
print line_list | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
first part solution of csv parsing problem for https://www.freelancer.com/projects/data-entry/Parser-adapt-the-format-zipped/details