Skip to content

Instantly share code, notes, and snippets.

@gaphex
Last active February 26, 2020 13:48
Show Gist options
  • Select an option

  • Save gaphex/baf40725791eabe4ec58cbbf4583c57f to your computer and use it in GitHub Desktop.

Select an option

Save gaphex/baf40725791eabe4ec58cbbf4583c57f to your computer and use it in GitHub Desktop.
def prepare_snli(sa, sb, lb):
classes = {"entailment", "contradiction"}
anc_to_pairs = defaultdict(list)
filtered = {}
skipped = 0
anchor_id = 0
for xa, xb, y in zip(sa, sb, lb):
anc_to_pairs[xa].append((xb, y))
for anchor, payload in anc_to_pairs.items():
filtered[anchor_id] = defaultdict(list)
filtered[anchor_id]["anchor"].append(anchor)
labels = set([t[1] for t in payload])
if len(labels&classes) == len(classes):
for text, label in payload:
filtered[anchor_id][label].append(text)
anchor_id += 1
else:
skipped += 1
print("Loaded: {} \nSkipped: {}".format(anchor_id, skipped))
return filtered
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment