Skip to content

Instantly share code, notes, and snippets.

View wesslen's full-sized avatar

Ryan Wesslen wesslen

View GitHub Profile
@wesslen
wesslen / kappa.py
Last active August 8, 2022 17:26
Simple Cohen's Kappa with Prodigy on binary classification
from prodigy.components.db import connect
#from prodigy.util import split_string
import pandas as pd
from sklearn.metrics import cohen_kappa_score
from typing import List, Optional
import wasabi
import typer
app = typer.Typer()
msg = wasabi.Printer()
@wesslen
wesslen / pokemondict.tsv
Last active July 20, 2022 16:43
tagtog tsv synonym sub_type in spacy example
1 Bulbasaur Fushigidane
2 Ivysaur Fushigisou
3 Venusaur Fushigibana
4 Charmander Hitokage
5 Charmeleon Lizardo
@wesslen
wesslen / convert-labels.py
Last active July 15, 2022 16:35
convert binary Prodigy labels to be used in `train --textcat`
from prodigy.components.db import connect
# pull examples from dataset
db = connect()
examples = db.get_dataset("textcat-samp")
# modify change rejects to "not_" as accepts
new_examples = []
for eg in examples:
if eg["answer"] == "reject":
@wesslen
wesslen / shap_textcat.py
Last active May 19, 2022 02:34
Prodigy recipe for spaCy text categorization with custom HTML SHAP gradient
from prodigy.components.loaders import JSONL
import prodigy
import matplotlib as mpl
import spacy
import shap # shap requires numba, which requires < numpy 1.22; you may need to downgrade numpy to 1.21.6
def predict(texts):
"""Convert list of text to bare strings and use textcat to predict"""
texts = [str(text) for text in texts]
results = []
@wesslen
wesslen / owen_wow_recipe.py
Last active May 11, 2022 14:22
Prodigy mp4 api recipe
import prodigy
import requests
def get_stream():
res = requests.get("https://owen-wilson-wow-api.herokuapp.com/wows/random?results=10").json()
for i in res:
movie = i["movie"]
url = i["video"]["480p"]
yield {"video": url, "text": movie}
@wesslen
wesslen / rethinking-globe-app.R
Last active January 27, 2022 17:37
Statistical Rethinking Globe Tossing Shiny App
library(rethinking)
library(shiny)
f <- alist(
W ~ dbinom( W+L ,p) , # binomial likelihood
p ~ dunif(0,1) # uniform prior
)
ui <- fluidPage(
titlePanel("Globe Tossing Problem"),
@wesslen
wesslen / gist:117705fdd4578a6076488f8c1e4d7e11
Created October 27, 2021 14:36
pandas columns to prodigy file formats
file_output = 'data/emails-hits.jsonl' # name of output file
(df_hits[['Message-ID','From','To','Subject','Body']]
.rename(columns={"Body": "text", "Message-ID": "id", "From": "from", "To": "to", "Subject":"subject"}) # optional rename
.groupby(['text']) # group_by text
.apply(lambda x: x[['id','from','to','subject']].to_dict(orient='list')) #what columns to nest
.reset_index()
.rename(columns={0:'meta'}) # rename nested meta
.to_json(file_output, orient='records',lines=True))
library(httr)
library(tidyverse)
start_time <- Sys.time()
getSimulation <- function(i,numSim=1000){
url <- "http://rw-simulation.herokuapp.com/get_returns_array?stock_array="
array = paste0("[",paste(rep(i, numSim), collapse=","),"]")
f <- GET(paste0(url,array))
json = content(f, "text")
@wesslen
wesslen / generate-simulations.R
Last active February 15, 2021 18:13
get simulated returns
library(tidyverse)
set.seed(123)
returns <- read_csv("returns.csv") %>%
select(Year, equities_sp, treasury_10yr) %>%
gather(key = "Asset", value = "Returns", -Year) %>%
mutate(Asset = ifelse(Asset=="equities_sp",
"Asset A: High risk, high return",
"Asset B: Low risk, low return"))
@wesslen
wesslen / 01-get-data-kaggle.py
Last active March 2, 2023 18:53
stack-overflow-query
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
stackOverflow = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
dataset_name="stackoverflow")
bq_assistant = BigQueryHelper("bigquery-public-data", "stackoverflow")
bq_assistant.list_tables()
# ['badges',
# 'comments',