This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extract words from kernel titles | |
end_of_sentence = '.' # symbol to denote the end of the sentence | |
def extract_words(title): | |
''' | |
Function which transforms kernel title into a list of words ending with 'end_of_sentence' word. | |
''' | |
title = clean_title(title) | |
words = title.split(' ') | |
return words |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# Lowercase, remove punctuation and numbers from kernel titles | |
def clean_title(title): | |
''' | |
Function to lowercase, remove punctuation and numbers from kernel titles | |
''' | |
# lowercase | |
title = str(title).lower() | |
# replace punctuation into spaces |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Merge kernels and versions to retreive kernel title and total votes for kernel | |
kernels_trc = kernels[['CurrentKernelVersionId', 'TotalVotes']].rename(columns={'CurrentKernelVersionId' : 'Id'}) | |
kernel_version_trc = kernel_versions[['Id', 'Title']] | |
kernels_titles_votes = kernels_trc.merge(kernel_version_trc) | |
# Sort titles by the number of votes | |
kernels_titles_votes = kernels_titles_votes.sort_values(by=['TotalVotes']) | |
# Retreive the list of popular kernel titles (at leat 1 vote) | |
popular_kernel_titles = kernels_titles_votes[kernels_titles_votes['TotalVotes'] > 0]['Title'].unique().tolist() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Load data | |
# Load kernel versions | |
kernel_versions = pd.read_csv('../input/KernelVersions.csv') | |
# Load kernels (to retreive TotalVotes) | |
kernels = pd.read_csv('../input/Kernels.csv') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define the demo dataset | |
class DogDataset3(Dataset): | |
''' | |
Sample dataset for Augmentor demonstration. | |
The dataset will consist of just one sample image. | |
''' | |
def __init__(self, image): | |
self.image = image |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import package | |
import Augmentor | |
# Initialize pipeline | |
p = Augmentor.DataPipeline([[np.array(image), np.array(mask)]]) | |
# Apply augmentations | |
p.rotate(1, max_left_rotation=3, max_right_rotation=3) | |
p.shear(1, max_shear_left = 3, max_shear_right = 3) | |
p.zoom_random(1, percentage_area=0.9) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import pytorch utilities from albumentations | |
from albumentations.pytorch import ToTensor | |
# Define the augmentation pipeline | |
augmentation_pipeline = A.Compose( | |
[ | |
A.HorizontalFlip(p = 0.5), # apply horizontal flip to 50% of images | |
A.OneOf( | |
[ | |
# apply one of transforms to 50% of images |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compose a complex augmentation pipeline | |
augmentation_pipeline = A.Compose( | |
[ | |
A.HorizontalFlip(p = 0.5), # apply horizontal flip to 50% of images | |
A.OneOf( | |
[ | |
# apply one of transforms to 50% of images | |
A.RandomContrast(), # apply random contrast | |
A.RandomGamma(), # apply random gamma | |
A.RandomBrightness(), # apply random brightness |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import patches from matplotlib to draw rectangle for bounding box | |
import matplotlib.patches as patches | |
# Visualize augmented image and bbox | |
fig, ax = plt.subplots(1,2, figsize = (15, 10)) | |
# Plot the original image and bounding box | |
ax[0].axis('off') | |
ax[0].imshow(image) | |
rect = patches.Rectangle((bboxes[0],bboxes[1]), bboxes[2], bboxes[3],linewidth=1,edgecolor='r',facecolor='none') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create bounding boxes from mask with cv2 | |
import cv2 | |
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) | |
bboxes = cv2.boundingRect(cv2.findNonZero(mask)) | |
# Augment image and bounding box | |
augmented_boxes = aug_pipeline(image = image, bboxes = [bboxes]) | |
# Access augmented image and bounding box | |
image_aug = augmented_boxes['image'][0] |