Skip to content

Instantly share code, notes, and snippets.

@WillKoehrsen
Last active October 4, 2018 15:38
Show Gist options
  • Save WillKoehrsen/f55562d451b1cf6f948868ec5f859b4a to your computer and use it in GitHub Desktop.
Save WillKoehrsen/f55562d451b1cf6f948868ec5f859b4a to your computer and use it in GitHub Desktop.
import numpy as np
import random
random.seed(100)
def generate_batch(pairs, n_positive = 50, negative_ratio = 1.0):
"""Generate batches of samples for training.
Random select positive samples
from pairs and randomly select negatives."""
# Create empty array to hold batch
batch_size = n_positive * (1 + negative_ratio)
batch = np.zeros((batch_size, 3))
# Continue to yield samples
while True:
# Randomly choose positive examples
for idx, (book_id, link_id) in enumerate(random.sample(pairs, n_positive)):
batch[idx, :] = (book_id, link_id, 1)
idx += 1
# Add negative examples until reach batch size
while idx < batch_size:
# Random selection
random_book = random.randrange(len(books))
random_link = random.randrange(len(links))
# Check to make sure this is not a positive example
if (random_book, random_link) not in pairs_set:
# Add to batch and increment index
batch[idx, :] = (random_book, random_link, neg_label)
idx += 1
# Make sure to shuffle order
np.random.shuffle(batch)
yield {'book': batch[:, 0], 'link': batch[:, 1]}, batch[:, 2]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment