Skip to content

Instantly share code, notes, and snippets.

View pmbaumgartner's full-sized avatar

Peter Baumgartner pmbaumgartner

View GitHub Profile
@pmbaumgartner
pmbaumgartner / _run_api.sh
Last active January 6, 2024 21:17
Mistal w/ vLLM. Run w/ a RTX 3090
# https://docs.mistral.ai/self-deployment/vllm/
export HF_TOKEN=<Huggingface Token>
docker run --gpus all \
-e HF_TOKEN=$HF_TOKEN -p 8000:8000 \
ghcr.io/mistralai/mistral-src/vllm:latest \
--host 0.0.0.0 \
--model mistralai/Mistral-7B-Instruct-v0.2
@pmbaumgartner
pmbaumgartner / ndict.py
Created February 21, 2023 20:27
create a dictionary based off of string variable names. based on rust struct shorthand init.
def ndict(*args):
"""Return a dictionary with the given keys and their values from globals."""
g = globals()
result = {}
for arg in args:
if arg in g:
result[arg] = g[arg]
else:
raise KeyError(f"Key '{arg}' not found in globals")
return result
from typing import TypedDict
DisplacyDepsWords = TypedDict(
"DisplacyDepsWords", {"text": str, "tag": str, "lemma": Optional[str]}
)
DisplacyDepsArcs = TypedDict(
"DisplacyDepsArcs", {"start": int, "end": int, "label": str, "dir": str}
)
DisplacyDepsData = TypedDict(
"DisplacyDepsData",
@pmbaumgartner
pmbaumgartner / dep-displacy.py
Created June 23, 2022 20:20
dependency matcher displacy example
from spacy import displacy
from spacy.tokens import Doc
from spacy.util import get_lang_class
from pathlib import Path
words = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "fox"]
heads = [3, 3, 3, 4, 4, 4, 8, 8, 5]
deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "pobj", "det", "amod"]
doc = Doc(get_lang_class("en")().vocab, words=words, heads=heads, deps=deps)
RELEASE="15.2.0"
mkdir /tmp/iosevka-font/v$RELEASE
cd /tmp/iosevka-font/v$RELEASE
wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-$RELEASE.zip
wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-aile-$RELEASE.zip
wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-curly-$RELEASE.zip
wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-curly-slab-$RELEASE.zip
wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-etoile-$RELEASE.zip
wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-fixed-$RELEASE.zip
@pmbaumgartner
pmbaumgartner / digit_ngram_suggester.py
Last active July 1, 2024 16:08
A span candidate suggester function for spaCy that suggests spans containing a digit.
from typing import Optional, Iterable, cast, List
from thinc.api import get_current_ops, Ops
from thinc.types import Ragged, Ints1d
from spacy.pipeline.spancat import Suggester
from spacy.tokens import Doc
from spacy.util import registry
@registry.misc("ngram_digits_suggester.v1")
@pmbaumgartner
pmbaumgartner / cleaning_tokenizer.py
Created January 10, 2022 15:49
Clean in spaCy Tokenizer
from spacy.tokenizer import Tokenizer
class CTLTokenizer(Tokenizer):
# https://stackoverflow.com/a/58718664
def __call__(self, string) -> spacy.tokens.Doc:
string = self.clean_string(string)
doc = super().__call__(string)
return doc
def clean_string(self, string: str) -> str:
@pmbaumgartner
pmbaumgartner / spacy_colab_gpu.py
Last active January 7, 2022 00:52
Install spaCy with correct cuda version on Colab with GPU
# SETUP
cuda_version = !nvcc --version | grep -Po '^.*release \K([0-9]+\.[0-9])' | sed 's/\.//g'
# REQUIREMENTS
from pathlib import Path
reqs = f"""
spacy[cuda{cuda_version.s},transformers,lookups]==3.2.1
"""
Path("requirements.txt").write_text(reqs)
!pip install --quiet -r requirements.txt
import random
from typing import List, Union
import shapely.geometry as geo
from tqdm import tqdm
MultiLineStringType = Union[List[geo.LineString], geo.MultiLineString]
def overlap_lines(
@pmbaumgartner
pmbaumgartner / dash.py
Created December 15, 2021 00:10
simple app for clicking on data to label
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import json
clicked = []
external_stylesheets = ["https://codepen.io/chriddyp/pen/bWLwgP.css"]
app = dash.Dash(external_stylesheets=external_stylesheets)