Skip to content

Instantly share code, notes, and snippets.

View mayurbhangale's full-sized avatar

Mayur Bhangale mayurbhangale

View GitHub Profile
@mayurbhangale
mayurbhangale / birthday_corpus.py
Created April 25, 2018 10:32 — forked from wpm/birthday_corpus.py
Generate a corpus of texts mentioning birthdays that can be used to train a Prodigy named entity recognizer.
import json
import re
import time
from random import choice, random
from typing import TextIO, Callable, Sequence, Tuple, Optional
import click
NAME = DATE = str
SPAN_OFFSET = Tuple[int, int]
SELECT ?PermID ?hasPermId ?rank ?prefLabel ?hasReportedTitle ?hasHolder ?hasPositionType ?hasPublicationStatus ?isPositionIn ?to ?familyName ?givenName ?holdsPosition ?hasTenureInOrganization ?additionalName ?hasGender ?bday ?honorificPrefix ?from ?hasQualification ?preferredName ?honorificSuffix ?withDegree ?fromInstitutionName ?inSubject ?isTenureIn ?dateOfDeath
WHERE { OPTIONAL { ?PermID <http://permid.org/ontology/common/hasPermId> ?hasPermId }
OPTIONAL { ?PermID <http://permid.org/ontology/person/rank> ?rank }
OPTIONAL { ?PermID <http://www.w3.org/2004/02/skos/core#prefLabel> ?prefLabel }
OPTIONAL { ?PermID <http://permid.org/ontology/person/hasReportedTitle> ?hasReportedTitle }
OPTIONAL { ?PermID <http://permid.org/ontology/person/hasHolder> ?hasHolder }
OPTIONAL { ?PermID <http://permid.org/ontology/person/hasPositionType> ?hasPositionType }
OPTIONAL { ?PermID <http://permid.org/ontology/common/hasPublicationStatus> ?hasPublicationStatus }
OPTIONA