Skip to content

Instantly share code, notes, and snippets.

View gvyshnya's full-sized avatar

George Vyshnya gvyshnya

View GitHub Profile
from sklearn.base import clone
def drop_col_feat_imp(model, X_train, y_train, random_state = 42):
# clone the model to have the exact same specification as the one initially trained
model_clone = clone(model)
# set random_state for comparability
model_clone.random_state = random_state
# training and scoring the benchmark model
model_clone.fit(X_train, y_train)
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
# Missing value summary
nan_columns = []
nan_values = []
for column in df.columns:
import datetime
from pyspark.sql.functions import year, month, dayofmonth
elevDF = sc.parallelize([
(datetime.datetime(1994, 1, 1, 0, 0), 1, 638.55),
(datetime.datetime(1994, 1, 1, 0, 0), 2, 638.55),
(datetime.datetime(1994, 1, 1, 0, 0), 3, 638.55),
(datetime.datetime(1994, 1, 1, 0, 0), 4, 638.55),
(datetime.datetime(1994, 1, 1, 0, 0), 5, 638.55)
]).toDF(["date", "hour", "value"])
@gvyshnya
gvyshnya / Audio Feature Extraction.py
Created September 2, 2020 19:16
Audio Feature Extraction from Audio Files using Librosa
def extract_feature_means(audio_file_path: str) -> pd.DataFrame:
# config settings
number_of_mfcc = c.NUMBER_OF_MFCC
# 1. Importing 1 file
y, sr = librosa.load(audio_file_path)
# Trim leading and trailing silence from an audio signal (silence before and after the actual audio)
signal, _ = librosa.effects.trim(y)
for ebird in final_data:
print("Starting to process a new species: ", ebird)
ebird_data = train_csv[train_csv['species'] == ebird]
short_file_name = ebird_data['ebird_code'].unique()[0]
print("Short file name: ", short_file_name)
pool = mp.Pool(c.NUMBER_OF_CPU_IN_POOL) # use the number of parallel processes as per the configured
funclist = []
@gvyshnya
gvyshnya / Parallel Audio Feature Extraction with Ray.py
Created September 2, 2020 19:25
Parallel Audio Feature Extraction with Ray
@ray.remote
def extract_feautres(trial_audio_file_path):
# process data frame
function_start_time = dt.datetime.now()
print("Started a file processing at ", function_start_time)
df0 = u.extract_feature_means(trial_audio_file_path)
function_finish_time = dt.datetime.now()
print("Fininished the file processing at ", function_finish_time)
@gvyshnya
gvyshnya / Parallel Audio Feature Extraction with Dask.py
Created September 2, 2020 19:29
Parallel Audio Feature Extraction with Dask
for ebird in final_data:
print("Starting to process a new species: ", ebird)
ebird_data = train_csv[train_csv['species'] == ebird]
short_file_name = ebird_data['ebird_code'].unique()[0]
print("Short file name: ", short_file_name)
result = []
for index, row in ebird_data.iterrows():
@gvyshnya
gvyshnya / AutoViz_Issues
Created October 3, 2020 20:21
AutoViz Issues
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt
from typing import Tuple
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
@gvyshnya
gvyshnya / AutoViz_Minor_Issue
Last active October 7, 2020 21:36
AutoViz Minor Issue: crash on a dataset without any significant/important variable
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt
from typing import Tuple
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
@gvyshnya
gvyshnya / plotly_facet_plot_with_violin_subplots.py
Created October 18, 2020 18:53
Plotly Facet Plot with Violin Subplots
def visualize_features_vs_target_label(df_data, label, feature_list, n_cols=3):
if len(feature_list) % n_cols == 0:
number_of_rows = int(len(feature_list)/n_cols)
else:
number_of_rows = int(len(feature_list)/n_cols) +1
fig = make_subplots(rows=number_of_rows, cols=n_cols)