This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for ebird in final_data: | |
print("Starting to process a new species: ", ebird) | |
ebird_data = train_csv[train_csv['species'] == ebird] | |
short_file_name = ebird_data['ebird_code'].unique()[0] | |
print("Short file name: ", short_file_name) | |
result = [] | |
for index, row in ebird_data.iterrows(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@ray.remote | |
def extract_feautres(trial_audio_file_path): | |
# process data frame | |
function_start_time = dt.datetime.now() | |
print("Started a file processing at ", function_start_time) | |
df0 = u.extract_feature_means(trial_audio_file_path) | |
function_finish_time = dt.datetime.now() | |
print("Fininished the file processing at ", function_finish_time) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for ebird in final_data: | |
print("Starting to process a new species: ", ebird) | |
ebird_data = train_csv[train_csv['species'] == ebird] | |
short_file_name = ebird_data['ebird_code'].unique()[0] | |
print("Short file name: ", short_file_name) | |
pool = mp.Pool(c.NUMBER_OF_CPU_IN_POOL) # use the number of parallel processes as per the configured | |
funclist = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_feature_means(audio_file_path: str) -> pd.DataFrame: | |
# config settings | |
number_of_mfcc = c.NUMBER_OF_MFCC | |
# 1. Importing 1 file | |
y, sr = librosa.load(audio_file_path) | |
# Trim leading and trailing silence from an audio signal (silence before and after the actual audio) | |
signal, _ = librosa.effects.trim(y) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
from pyspark.sql.functions import year, month, dayofmonth | |
elevDF = sc.parallelize([ | |
(datetime.datetime(1994, 1, 1, 0, 0), 1, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 2, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 3, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 4, 638.55), | |
(datetime.datetime(1994, 1, 1, 0, 0), 5, 638.55) | |
]).toDF(["date", "hour", "value"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import matplotlib.pyplot as plt | |
# Missing value summary | |
nan_columns = [] | |
nan_values = [] | |
for column in df.columns: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import clone | |
def drop_col_feat_imp(model, X_train, y_train, random_state = 42): | |
# clone the model to have the exact same specification as the one initially trained | |
model_clone = clone(model) | |
# set random_state for comparability | |
model_clone.random_state = random_state | |
# training and scoring the benchmark model | |
model_clone.fit(X_train, y_train) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.inspection import permutation_importance | |
# Here's how you use permutation importance | |
def get_permutation_importance(X, y, model) -> pd.DataFrame: | |
result = permutation_importance(model, X, y, n_repeats=1, | |
random_state=0) | |
# permutational importance results | |
result_df = pd.DataFrame(colnames, columns=['Feature']) | |
result_df['permutation_importance'] = result.get('importances') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import SelectFromModel | |
embeded_rf_selector = SelectFromModel(modeller, max_features=200) | |
embeded_rf_selector.fit(X, y) | |
embeded_rf_support = embeded_rf_selector.get_support() | |
embeded_rf_feature = X.loc[:,embeded_rf_support].columns.tolist() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import RFE | |
# Define dictionary to store our rankings | |
ranks = {} | |
# Create our function which stores the feature rankings to the ranks dictionary | |
def ranking(ranks, names, order=1): | |
minmax = MinMaxScaler() | |
ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0] | |
ranks = map(lambda x: round(x,2), ranks) | |
return dict(zip(names, ranks)) |