Created
June 2, 2022 18:23
-
-
Save kusal1990/430af2f9ea77decea0a3022653a03aaa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def data_preparation(start, end,praq_train): | |
# load a piece of data from file | |
praq_train = pq.read_pandas('/content/train.parquet', columns=[str(i) for i in range(start, end)]).to_pandas() | |
X = [] | |
y = [] | |
# using tdqm to evaluate processing time | |
# takes each index from df_train and iteract it from start to end | |
# it is divided by 3 because for each id_measurement there are 3 id_signal, and the start/end parameters are id_signal | |
for id_measurement in tqdm(df_metadata_train.index.levels[0].unique()[int(start/3):int(end/3)]): | |
X_signal = [] | |
# for each phase of the signal | |
for phase in [0,1,2]: | |
# extract from df_train both signal_id and target to compose the new data sets | |
signal_id, target = df_metadata_train.loc[id_measurement].loc[phase] | |
# but just append the target one time, to not triplicate it | |
if phase == 0: | |
y.append(target) | |
# extract and transform data into sets of features | |
X_signal.append(transform_signal(praq_train[str(signal_id)])) | |
# concatenate all the 3 phases in one matrix | |
X_signal = np.concatenate(X_signal, axis=1) | |
# add the data to X | |
X.append(X_signal) | |
X = np.asarray(X) | |
y = np.asarray(y) | |
return X, y |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ok