This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import clone | |
def drop_col_feat_imp(model, X_train, y_train, random_state = 42): | |
# clone the model to have the exact same specification as the one initially trained | |
model_clone = clone(model) | |
# set random_state for comparability | |
model_clone.random_state = random_state | |
# training and scoring the benchmark model | |
model_clone.fit(X_train, y_train) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import matplotlib.pyplot as plt | |
# Missing value summary | |
nan_columns = [] | |
nan_values = [] | |
for column in df.columns: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
from pyspark.sql.functions import year, month, dayofmonth | |
elevDF = sc.parallelize([ | |
(datetime.datetime(1994, 1, 1, 0, 0), 1, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 2, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 3, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 4, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 5, 638.55) | |
]).toDF(["date", "hour", "value"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_feature_means(audio_file_path: str) -> pd.DataFrame: | |
# config settings | |
number_of_mfcc = c.NUMBER_OF_MFCC | |
# 1. Importing 1 file | |
y, sr = librosa.load(audio_file_path) | |
# Trim leading and trailing silence from an audio signal (silence before and after the actual audio) | |
signal, _ = librosa.effects.trim(y) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for ebird in final_data: | |
print("Starting to process a new species: ", ebird) | |
ebird_data = train_csv[train_csv['species'] == ebird] | |
short_file_name = ebird_data['ebird_code'].unique()[0] | |
print("Short file name: ", short_file_name) | |
pool = mp.Pool(c.NUMBER_OF_CPU_IN_POOL) # use the number of parallel processes as per the configured | |
funclist = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@ray.remote | |
def extract_feautres(trial_audio_file_path): | |
# process data frame | |
function_start_time = dt.datetime.now() | |
print("Started a file processing at ", function_start_time) | |
df0 = u.extract_feature_means(trial_audio_file_path) | |
function_finish_time = dt.datetime.now() | |
print("Fininished the file processing at ", function_finish_time) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for ebird in final_data: | |
print("Starting to process a new species: ", ebird) | |
ebird_data = train_csv[train_csv['species'] == ebird] | |
short_file_name = ebird_data['ebird_code'].unique()[0] | |
print("Short file name: ", short_file_name) | |
result = [] | |
for index, row in ebird_data.iterrows(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np # linear algebra | |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) | |
import datetime as dt | |
from typing import Tuple | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import plotly.express as px |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np # linear algebra | |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) | |
import datetime as dt | |
from typing import Tuple | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import plotly.express as px |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def visualize_features_vs_target_label(df_data, label, feature_list, n_cols=3): | |
if len(feature_list) % n_cols == 0: | |
number_of_rows = int(len(feature_list)/n_cols) | |
else: | |
number_of_rows = int(len(feature_list)/n_cols) +1 | |
fig = make_subplots(rows=number_of_rows, cols=n_cols) | |