Skip to content

Instantly share code, notes, and snippets.

@alanmbarr
Last active November 8, 2018 04:23
Show Gist options
  • Save alanmbarr/432677eef102030ede5e67d604248aed to your computer and use it in GitHub Desktop.
Save alanmbarr/432677eef102030ede5e67d604248aed to your computer and use it in GitHub Desktop.
take pdfs and distort them with imgaug
import imageio, glob
import PyPDF2
from wand.image import Image
from wand.color import Color
import numpy
import imgaug as ia
from imgaug import augmenters as iaa
import numpy as np
import cv2
def get_pdfs():
for path in glob.glob("*.pdf"):
all_pages = Image(filename=path, resolution=150)
pages = []
for i, page in enumerate(all_pages.sequence):
with Image(page) as img:
img.format = 'png'
img_buffer=np.asarray(bytearray(img.make_blob()), dtype=np.uint8)
retval = cv2.imdecode(img_buffer, cv2.IMREAD_UNCHANGED)
retval = cv2.resize(retval,(img.width,img.height))
pages.append(retval)
augmented = augment_images(pages)
write_image(path.strip(".pdf"),augmented)
def augment_images(images):
ia.seed(numpy.random.randint(100000, size=1)[0])
for image in images:
#image might not have 3 dimensions
if np.ndim(image) < 3:
image = image[np.newaxis,:, :]
# Example batch of images.
# The array has shape (32, 64, 64, 3) and dtype uint8.
seq = iaa.Sequential([
#iaa.Fliplr(0.5), # horizontal flips
iaa.Crop(percent=(0, 0.1)), # random crops
# Small gaussian blur with random sigma between 0 and 0.5.
# But we only blur about 50% of all images.
iaa.Sometimes(0.5,
iaa.GaussianBlur(sigma=(0, 0.5))
),
# Strengthen or weaken the contrast in each image.
iaa.ContrastNormalization((0.75, 1.5)),
# Add gaussian noise.
# For 50% of all images, we sample the noise once per pixel.
# For the other 50% of all images, we sample the noise per pixel AND
# channel. This can change the color (not only brightness) of the
# pixels.
iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
# Make some images brighter and some darker.
# In 20% of all cases, we sample the multiplier once per channel,
# which can end up changing the color of the images.
iaa.Multiply((0.8, 1.2), per_channel=0.2),
# Apply affine transformations to each image.
# Scale/zoom them, translate/move them, rotate them and shear them.
iaa.Affine(
scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
rotate=(-25, 25),
shear=(-8, 8)
)
], random_order=True) # apply augmenters in random order
images_aug = seq.augment_images(images)
return images_aug
def write_image(path,image):
imageio.mimwrite(path+".tiff",image, format="TIFF")
get_pdfs()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment