Skip to content

Instantly share code, notes, and snippets.

View Steboss's full-sized avatar

Steboss Steboss

View GitHub Profile
@Steboss
Steboss / subsamples.py
Created February 22, 2021 11:07
Create subsamples for a signal
# lowest subsample
scale_low = 5
# highest subsample
scale_high = 9
# intervals between samples
scale_dense = 0.25
# create an array of scales
scales = (2**np.arange(scale_low, scale_high, scale_dense)).astype(np.intc)
# ...
# here there will be a cycle and define what is the current scale
avg_x = np.mean(X)
Xt = []
for i, val in enumerate(X, 0):
if i==0:
Xt.append(val - avg_x)
else:
Xt.append(Xt[i-1] + (val - avg_x))
June-July Biden June-July Trump September Biden September Trump
# pro comments 34'096 61'525 18'459 45'670
# contra comments 27'817 57'820 29'844 61'035
@Steboss
Steboss / classified_dataset.csv
Created October 21, 2020 14:33
Final classified comments
We can make this file beautiful and searchable if this error is corrected: It looks like row 3 should actually have 5 columns, instead of 6 in line 2.
, June-July Biden, June-July Trump, September Biden, September Trump
# pro comments, 34'096, 61'525, 18'459, 45'670
# contra comments,27,817, 57'820, 29'844, 61'035
@Steboss
Steboss / LSTM.py
Created October 21, 2020 14:12
LSTM to perform classification on unclassified text
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Dense, Bidirectional
from sklearn.model_selection import train_test_split
# Convert text to array with Glove
def text_to_array(text):
r"""function to convert the text to vector through Glove"""
@Steboss
Steboss / TF_IDF_LogReg.py
Created October 21, 2020 11:28
First Iteration for classification
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import KFold
word_vectorizer = TfidfVectorizer(
ngram_range=(1,4),
min_df=3,
max_df=0.9,
use_idf=True,
@Steboss
Steboss / TF_IDF_LogReg.py
Created October 21, 2020 11:25
First Classification
word_vectorizer = TfidfVectorizer(
ngram_range=(1,4),
min_df=3,
max_df=0.9,
use_idf=True,
smooth_idf=True,
sublinear_tf=True,
analyzer='word',
token_pattern=r'\w{1,}',
max_features=50000)
@Steboss
Steboss / data_summary.csv
Created October 21, 2020 09:14
Comments, users and posts scraped from Trump's and Biden's Instagram
June-July Biden June-July Trump September Biden September Trump
# comments 83'106 136'886 61'859 110'204
# users 46'070 87'274 33'756 67'449
# posts 36 30 30 24
# followers 2'605'558 20'718'259 3'767'723 21'671'532
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
libraries = [ "fftw3","m"]
extra_compile_args = ['-O3', '-std=c99']
extra_link_args = ['-O3', '-std=c99']
cython_directives = {
'embedsignature': True,
#translate the pointer to a numpy array
magnitude_array = np.asarray(magnitude)
#reshape the magnitude_array so we'll have a matrix
cols = int( (length/(windowSize/2)) -1)
rows = int(windowSize/2)+1
new_array = np.zeros([cols,rows])
counter = 0
for i in range(0,cols):