Skip to content

Instantly share code, notes, and snippets.

View ortsed's full-sized avatar

Llewellyn Jones ortsed

View GitHub Profile
@ortsed
ortsed / t-sne.py
Created January 21, 2020 16:31
t-Stochastic Neighbor Embedding Example
Python
from sklearn import datasets
import seaborn as sn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
#import the digits dataset
digits = datasets.load_digits()
@ortsed
ortsed / strs_in_floats.py
Created January 14, 2020 04:35
Strings in Floats
def strs_in_float(series):
"""
Extracts the strings in what would otherwise be a Pandas Series of floats
For data cleaning.
"""
def is_float(x):
try:
float(x)
return False
except:
@ortsed
ortsed / merge_similar.py
Created January 12, 2020 01:51
Merge Similar Datasets
import pandas as pd
def merge_similar(files=[], encoding=None):
"""
Concats datasets with similar but not necessarily the same columns
by creating empty columns for each dataframe missing a column found in the others
"""
merged = []
for file in files:
df = pd.read_csv(file, encoding=encoding)
from nltk.corpus import stopwords
stopwords_list = stopwords.words('english')
from string import punctuation
stopwords_list += list(punctuation)
from nltk import word_tokenize
tokens = word_tokenize(some_text_data)
stopped_tokens = [w.lower() for w in tokens if w not in stopwords_list]
# keras https://keras.io/
from keras.models import Sequential
from keras import models
from keras import layers
from keras import optimizers
model = Sequential()
model.add(layers.Dense(50, activation='relu', input_shape=(2000,)))
model.add(layers.Dense(1, activation='relu'))
import numpy as np
def find_term_derivative(term):
constant = term[0]*term[1]
exponent = term[1] - 1
return (constant, exponent)
def find_derivative(function_terms):
derivative_terms = list(map(lambda term: find_term_derivative(term),function_terms))
return list(filter(lambda derivative_term: derivative_term[0] != 0, derivative_terms))
@ortsed
ortsed / sklearn_model_summary.py
Last active March 21, 2022 03:52
Sklearn Model Summary
def model_summary(model, X, y, columns=[]):
"""
Takes a sklearn model and outputs basic stats,
based on input features (X) and target (y)
"""
import pandas as pd
from scipy import stats
import numpy as np
lm = model
params = np.append(lm.intercept_,lm.coef_)

Keybase proof

I hereby claim:

  • I am ortsed on github.
  • I am llewhinkes (https://keybase.io/llewhinkes) on keybase.
  • I have a public key ASBcU6XcOnBo98h_BCPJ4a1w8HK6U3TXGKqLkle9emjSywo

To claim this, I am signing this object:

@ortsed
ortsed / pystats_cheat.py
Created May 9, 2019 17:47
Python Stats Cheat Sheet
# Cheat Sheet
import warnings
warnings.filterwarnings('ignore')
# Pandas
pd.to_numeric(df, coalesce="")
pd.to_datetime()
pd.to_timestamp()
pd.to_timedelta()
These agencies received fewer than 50 FOIA requests in 2018. We can do better.
Administrative Conference of the United States
Advisory Council on Historic Preservation
American Battle Monuments Commission
Appraisal Subcommittee