Skip to content

Instantly share code, notes, and snippets.

View tiaplagata's full-sized avatar

Tia Plagata tiaplagata

View GitHub Profile
@tiaplagata
tiaplagata / custom_pipeline_2.py
Last active November 17, 2020 03:00
Custom Pipeline with Grid Search
from sklearn.metrics import recall_score, make_scorer
# Define the pipeline
pipeline = Pipeline(steps= [
("ColumnTransformer", SelectColumnsTransformer(columns=features_to_use)),
("TransformCategorical", Transform_Categorical()),
("SMOTE", SMOTE()),
("GradientBooster", GradientBoostingClassifier())
])
@tiaplagata
tiaplagata / function_transformer.py
Created November 18, 2020 00:31
FunctionTransformer in Pipeline
from sklearn.preprocessing import FunctionTransformer
# The function we want to convert to a class for our pipeline
def transform_yes_no(X):
X['international plan'] = X['international plan'].apply(lambda x: 1 if x.lower() == 'yes' else 0)
X['voice mail plan'] = X['voice mail plan'].apply(lambda x: 1 if x.lower() == 'yes' else 0)
return X
# Convert to class for pipeline
YesNoTransformer = FunctionTransformer(transform_yes_no)
@tiaplagata
tiaplagata / colab_setup.py
Last active December 15, 2020 13:31
Code to set up data in Google Colab
from google.colab import drive
import os
# Mount Google Drive
drive.mount('/gdrive', force_remount=True)
# Location of Zip File
drive_path = '/gdrive/MyDrive/Data/pneumonia_data.zip'
local_path = '/content'
@tiaplagata
tiaplagata / image_generators_colab.py
Last active December 14, 2020 21:12
Data Preprocessing for CNNs
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Create data generators
# File path = path to train/test/val folders respectively
# Use a target size of 224x224 px for each image (or whatever size you choose)
# Batch size = total number of images in the train set, test set, val set respectively
# Ensure class_mode is binary
train_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
'/content/chest_xray/train',
target_size=(224, 224),
@tiaplagata
tiaplagata / wordcloud_imports.py
Last active November 7, 2021 23:56
imports needed for word clouds
from wordcloud import WordCloud, ImageColorGenerator
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
@tiaplagata
tiaplagata / basic_wordcloud.py
Last active November 7, 2021 23:57
creating a basic word cloud from text corpus
#Give our Rome corpus a variable name
rome_corpus = df.lemmatized[10]
#Instantiate wordcloud object and use method to feed it our corpus
wc = WordCloud().generate_from_text(rome_corpus)
#Use matplotlib.pyplot to display the fitted wordcloud
#Turn axis off to get rid of axis numbers
plt.imshow(wc)
plt.axis('off')
# Store our document term matrix data for Rome
data = dtm.transpose()['Rome, Italy'].sort_values(ascending=False)
# Generate the word cloud from frequencies
wc = WordCloud().generate_from_frequencies(data)
plt.imshow(wc)
plt.axis('off')
plt.show()
@tiaplagata
tiaplagata / basic_wc_func.py
Last active November 7, 2021 23:57
function to generate a word cloud from text
#Define a list of stop words
stopwords = ['private', 'tour', 'transfer', 'guide', 'skip', 'line',
'skiptheline', 'vice', 'versa']
#A function to generate the word cloud from text
def generate_basic_wordcloud(data, title):
cloud = WordCloud(width=400,
height=330,
max_words=150,
colormap='tab20c',
@tiaplagata
tiaplagata / create_wc_mask.py
Created January 27, 2021 20:51
get a word cloud mask into the correct format
# Create an array from the image you want to use as a mask
## Your file path will look different
rome_mask = np.array(Image.open('/Users/tiaplagata/Downloads/italy.jpg'))
@tiaplagata
tiaplagata / better_wordcloud_func.py
Created January 27, 2021 20:55
create a prettier word cloud with a mask
# A similar function, but using the mask
def generate_better_wordcloud(data, title, mask=None):
cloud = WordCloud(scale=3,
max_words=150,
colormap='RdYlGn',
mask=mask,
background_color='white',
stopwords=stopwords,
collocations=True).generate_from_text(data)
plt.figure(figsize=(10,8))