tiaplagata’s gists

tiaplagata / custom_pipeline_2.py

Last active November 17, 2020 03:00

Custom Pipeline with Grid Search

	from sklearn.metrics import recall_score, make_scorer

	# Define the pipeline
	pipeline = Pipeline(steps= [
	("ColumnTransformer", SelectColumnsTransformer(columns=features_to_use)),
	("TransformCategorical", Transform_Categorical()),
	("SMOTE", SMOTE()),
	("GradientBooster", GradientBoostingClassifier())
	])

tiaplagata / function_transformer.py

Created November 18, 2020 00:31

FunctionTransformer in Pipeline

	from sklearn.preprocessing import FunctionTransformer

	# The function we want to convert to a class for our pipeline
	def transform_yes_no(X):
	X['international plan'] = X['international plan'].apply(lambda x: 1 if x.lower() == 'yes' else 0)
	X['voice mail plan'] = X['voice mail plan'].apply(lambda x: 1 if x.lower() == 'yes' else 0)
	return X

	# Convert to class for pipeline
	YesNoTransformer = FunctionTransformer(transform_yes_no)

tiaplagata / colab_setup.py

Last active December 15, 2020 13:31

Code to set up data in Google Colab

	from google.colab import drive
	import os

	# Mount Google Drive
	drive.mount('/gdrive', force_remount=True)

	# Location of Zip File
	drive_path = '/gdrive/MyDrive/Data/pneumonia_data.zip'
	local_path = '/content'

tiaplagata / image_generators_colab.py

Last active December 14, 2020 21:12

Data Preprocessing for CNNs

	from tensorflow.keras.preprocessing.image import ImageDataGenerator

	# Create data generators
	# File path = path to train/test/val folders respectively
	# Use a target size of 224x224 px for each image (or whatever size you choose)
	# Batch size = total number of images in the train set, test set, val set respectively
	# Ensure class_mode is binary
	train_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(
	'/content/chest_xray/train',
	target_size=(224, 224),

tiaplagata / wordcloud_imports.py

Last active November 7, 2021 23:56

imports needed for word clouds

	from wordcloud import WordCloud, ImageColorGenerator
	from PIL import Image
	import matplotlib.pyplot as plt

	import pandas as pd
	import numpy as np

tiaplagata / basic_wordcloud.py

Last active November 7, 2021 23:57

creating a basic word cloud from text corpus

	#Give our Rome corpus a variable name
	rome_corpus = df.lemmatized[10]

	#Instantiate wordcloud object and use method to feed it our corpus
	wc = WordCloud().generate_from_text(rome_corpus)

	#Use matplotlib.pyplot to display the fitted wordcloud
	#Turn axis off to get rid of axis numbers
	plt.imshow(wc)
	plt.axis('off')

tiaplagata / dtm_wordcloud.py

Created January 27, 2021 20:06

	# Store our document term matrix data for Rome
	data = dtm.transpose()['Rome, Italy'].sort_values(ascending=False)

	# Generate the word cloud from frequencies
	wc = WordCloud().generate_from_frequencies(data)

	plt.imshow(wc)
	plt.axis('off')
	plt.show()

tiaplagata / basic_wc_func.py

Last active November 7, 2021 23:57

function to generate a word cloud from text

	#Define a list of stop words
	stopwords = ['private', 'tour', 'transfer', 'guide', 'skip', 'line',
	'skiptheline', 'vice', 'versa']

	#A function to generate the word cloud from text
	def generate_basic_wordcloud(data, title):
	cloud = WordCloud(width=400,
	height=330,
	max_words=150,
	colormap='tab20c',

tiaplagata / create_wc_mask.py

Created January 27, 2021 20:51

get a word cloud mask into the correct format

	# Create an array from the image you want to use as a mask
	## Your file path will look different
	rome_mask = np.array(Image.open('/Users/tiaplagata/Downloads/italy.jpg'))

tiaplagata / better_wordcloud_func.py

Created January 27, 2021 20:55

create a prettier word cloud with a mask

	# A similar function, but using the mask
	def generate_better_wordcloud(data, title, mask=None):
	cloud = WordCloud(scale=3,
	max_words=150,
	colormap='RdYlGn',
	mask=mask,
	background_color='white',
	stopwords=stopwords,
	collocations=True).generate_from_text(data)
	plt.figure(figsize=(10,8))

Tia Plagata tiaplagata