Skip to content

Instantly share code, notes, and snippets.

View omri374's full-sized avatar

Omri Mendels omri374

  • Microsoft
View GitHub Profile
@omri374
omri374 / ner_biblical_names.ipynb
Last active May 8, 2021 23:27
Notebook for post on biblical names with NER models
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import spacy
en_core_web_lg = spacy.load("en_core_web_lg")
name = "Simon"
doc=en_core_web_trf(f"My name is {name}")
print(f"Name = {name}. Detected entities: {doc.ents}")
name = "Katy"
doc=en_core_web_trf(f"My name is {name}")
print(f"Name = {name}. Detected entities: {doc.ents}")
name = "Moses"
doc=en_core_web_trf(f"This is what God said to {name}")
import pprint
import spacy
nlp = spacy.load("en_core_web_trf")
detailed_results = {}
print("Model name: en_core_web_trf")
for template, name_set in itertools.product(templates, name_sets.items()):
print(f"Name set: {name_set[0]}, Template: \"{template}\"")
results = names_recall(nlp, name_set[1], template)
detailed_results[template, name_set[0]] = results
import pprint
detailed_results = {}
nlp = en_core_web_lg
print("Model name: en_core_web_lg")
for template, name_set in itertools.product(templates, name_sets.items()):
print(f"Name set: {name_set[0]}, Template: \"{template}\"")
results = names_recall(nlp, name_set[1], template)
detailed_results[template, name_set[0]] = results
def names_recall(nlp: spacy.lang.en.English, names: List[str], template: str):
"""
Run the spaCy NLP model on the template + name,
calculate recall for detecting the "PERSON" entity
and return a detailed list of detection
:param nlp: spaCy nlp model
:param names: list of names to run model on
:param template: sentence with placeholder for name (e.g. "He calls himself {}")
"""
results = {}
@omri374
omri374 / biblical_names1.py
Last active October 16, 2021 11:35
Setting up name lists and templates
biblical_names = ["David", "Moses", "Abraham", "Samuel", "Jacob",
"Isaac", "Jesus", "Matthew",
"John", "Judas","Simon", "Mary"] # Random biblical names
other_names = ["Beyonce", "Ariana", "Katy", # Singers
"Lebron", "Coby", # NBA players
"William", "Charles","Robert", "Margaret","Frank", "Helen", # Popular (non biblical) names in 1900 (https://www.ssa.gov/oact/babynames/decades/names1900s.html)
"Ronald", "George", "Bill", "Barack", "Donald", "Joe" # Presidents
]
@omri374
omri374 / timetook.py
Created May 16, 2020 17:20
Time took context manager. This class logs the time it took for a code chunk to run, using a context manager wrapper
from time import time
class TimeTook(object):
"""
Calculates the time a block took to run.
Example usage:
with TimeTook("sample"):
s = [x for x in range(10000000)]
Modified from: https://blog.usejournal.com/how-to-create-your-own-timing-context-manager-in-python-a0e944b48cf8
"""
@omri374
omri374 / spacy_preprocessor.py
Created May 13, 2020 10:39
Text preprocessing using spaCy
import re
from typing import List
import spacy
from spacy.tokens import Doc
from tqdm import tqdm
class SpacyPreprocessor:
def __init__(
@omri374
omri374 / sparklyr_example.R
Created June 3, 2018 07:16
An example of a data pipeline using Sparklyr, mostly based on this blog post: https://beta.rstudioconnect.com/content/1518/notebook-classification.html
# Databricks notebook source
# install sparklyr (we need this every time we start our cluster as it has to install packages on all workers)
install.packages("sparklyr")
#titanic data
install.packages('titanic')
library(titanic)
# Load sparklyr package.
library(sparklyr)