This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We extract the list of duplicates files to remove | |
duplicates = pd.read_excel("./roman-numerals-labeling-plb-20210830.xlsx", sheet_name="duplicates") | |
duplicates_list = duplicates["file"].tolist() | |
# as well as the unreadable files | |
files_analysis = pd.read_excel("./roman-numerals-labeling-plb-20210830.xlsx", sheet_name="analysis", usecols="B:L") | |
erroneous_list = files_analysis["file"][files_analysis["to_be_removed"] == 1].tolist() | |
removal_list = duplicates_list + erroneous_list | |
print(len(duplicates_list), "duplicates +", len(erroneous_list), "errouneous =", len(removal_list), "pictures to remove.") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DATA_FOLDER = "./data/" | |
INITIAL_FOLDERS = ["train", "val", "label_book"] | |
FOLDERS = ["train", "val"] | |
LABELS = ["i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"] | |
initial_folder = FOLDER(DATA_FOLDER, INITIAL_FOLDERS, LABELS) | |
initial_folder.summary() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class FOLDER(): | |
def __init__(self, DATA_FOLDER, FOLDERS, LABELS): | |
self.DATA_FOLDER = DATA_FOLDER | |
self.FOLDERS = FOLDERS | |
self.LABELS = LABELS | |
def summary(self, display_ratio=4): | |
for folder in self.FOLDERS: |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print("TRAIN PERFORMANCE:\n") | |
confusion_matrix_train = confusion_matrix(y_train, model.predict(X_train)) | |
confusion_matrix_train = pd.DataFrame(confusion_matrix_train, | |
index=["Actual_No","Actual_Yes"], | |
columns=["Predicted_No","Predicted_Yes"]) | |
display(confusion_matrix_train) | |
recall_resignation_train = confusion_matrix_train.iloc[1,1] / confusion_matrix_train.iloc[1,:].sum() | |
print("Train Score: {}".format(round(model.score(X_train,y_train),3))) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fdasrsf import fPCA, time_warping, fdawarp, fdahpca | |
# Functional Alignment | |
# Align time-series | |
warp_f = time_warping.fdawarp(f, time) | |
warp_f.srsf_align() | |
warp_f.plot() | |
# Functional Principal Components Analysis |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
# Import the CSV file with only useful columns | |
# source: https://www.data.gouv.fr/fr/datasets/temperature-quotidienne-departementale-depuis-janvier-2018/ | |
df = pd.read_csv("temperature-quotidienne-departementale.csv", sep=";", usecols=[0,1,4]) | |
# Rename columns to simplify syntax | |
df = df.rename(columns={"Code INSEE département": "Region", "TMax (°C)": "Temp"}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# We start with the import of standard ML librairies | |
import pandas as pd | |
import numpy as np | |
import math | |
from sklearn.datasets import make_regression | |
from sklearn.ensemble import RandomForestRegressor |
NewerOlder