Aleksandra Deis Lexie88rus

🏡

WFH

Data Scientist

Lexie88rus / extract_words.py

Created August 30, 2019 16:00

Extract words from sentences

	# Extract words from kernel titles
	end_of_sentence = '.' # symbol to denote the end of the sentence
	def extract_words(title):
	'''
	Function which transforms kernel title into a list of words ending with 'end_of_sentence' word.
	'''
	title = clean_title(title)
	words = title.split(' ')

	return words

Lexie88rus / title_cleaning_function.py

Created August 29, 2019 18:32

Title cleaning function

	import re

	# Lowercase, remove punctuation and numbers from kernel titles
	def clean_title(title):
	'''
	Function to lowercase, remove punctuation and numbers from kernel titles
	'''
	# lowercase
	title = str(title).lower()
	# replace punctuation into spaces

Lexie88rus / popular_titles.py

Created August 29, 2019 18:03

Create a list of popular kernel titles

	# Merge kernels and versions to retreive kernel title and total votes for kernel
	kernels_trc = kernels[['CurrentKernelVersionId', 'TotalVotes']].rename(columns={'CurrentKernelVersionId' : 'Id'})
	kernel_version_trc = kernel_versions[['Id', 'Title']]
	kernels_titles_votes = kernels_trc.merge(kernel_version_trc)

	# Sort titles by the number of votes
	kernels_titles_votes = kernels_titles_votes.sort_values(by=['TotalVotes'])

	# Retreive the list of popular kernel titles (at leat 1 vote)
	popular_kernel_titles = kernels_titles_votes[kernels_titles_votes['TotalVotes'] > 0]['Title'].unique().tolist()

Lexie88rus / load_data.py

Created August 29, 2019 17:57

Load Kaggle kernels data

	import pandas as pd

	# Load data
	# Load kernel versions
	kernel_versions = pd.read_csv('../input/KernelVersions.csv')
	# Load kernels (to retreive TotalVotes)
	kernels = pd.read_csv('../input/Kernels.csv')

Lexie88rus / augmenter_pytorch_demo.py

Created August 8, 2019 18:57

Example of usage of Augmenter package with PyTorch

	# Define the demo dataset
	class DogDataset3(Dataset):
	'''
	Sample dataset for Augmentor demonstration.
	The dataset will consist of just one sample image.
	'''

	def __init__(self, image):
	self.image = image

Lexie88rus / augmenter_demo.py

Created August 8, 2019 18:51

Augmenter simple example

	# Import package
	import Augmentor

	# Initialize pipeline
	p = Augmentor.DataPipeline([[np.array(image), np.array(mask)]])

	# Apply augmentations
	p.rotate(1, max_left_rotation=3, max_right_rotation=3)
	p.shear(1, max_shear_left = 3, max_shear_right = 3)
	p.zoom_random(1, percentage_area=0.9)

Lexie88rus / pytorch_integration_albumentations.py

Created August 8, 2019 18:42

Albumentations: PyTorch integration

	# Import pytorch utilities from albumentations
	from albumentations.pytorch import ToTensor

	# Define the augmentation pipeline
	augmentation_pipeline = A.Compose(
	[
	A.HorizontalFlip(p = 0.5), # apply horizontal flip to 50% of images
	A.OneOf(
	[
	# apply one of transforms to 50% of images

Lexie88rus / albumetations_pipelining.py

Created August 8, 2019 18:34

Pipelining augmentations with Albumentations

	# Compose a complex augmentation pipeline
	augmentation_pipeline = A.Compose(
	[
	A.HorizontalFlip(p = 0.5), # apply horizontal flip to 50% of images
	A.OneOf(
	[
	# apply one of transforms to 50% of images
	A.RandomContrast(), # apply random contrast
	A.RandomGamma(), # apply random gamma
	A.RandomBrightness(), # apply random brightness

Lexie88rus / visualize_bbox_albumentations.py

Last active August 10, 2019 12:05

Visualize augmented image with bounding box with Albumentations

	# Import patches from matplotlib to draw rectangle for bounding box
	import matplotlib.patches as patches

	# Visualize augmented image and bbox
	fig, ax = plt.subplots(1,2, figsize = (15, 10))

	# Plot the original image and bounding box
	ax[0].axis('off')
	ax[0].imshow(image)
	rect = patches.Rectangle((bboxes[0],bboxes[1]), bboxes[2], bboxes[3],linewidth=1,edgecolor='r',facecolor='none')

Lexie88rus / bounding_box_albumentations.py

Created August 8, 2019 18:14

Transform image with bounding box albumentations

	# Create bounding boxes from mask with cv2
	import cv2
	mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
	bboxes = cv2.boundingRect(cv2.findNonZero(mask))

	# Augment image and bounding box
	augmented_boxes = aug_pipeline(image = image, bboxes = [bboxes])

	# Access augmented image and bounding box
	image_aug = augmented_boxes['image'][0]