You'll need ripgrep and pandoc to get started. You can read more about ripgrep here and pandoc here. I use both of these frequently and they're quite helpful.
You can install them both with homebrew:
brew install pandoc ripgrep| #cloud-config | |
| package_upgrade: true | |
| ssh_authorized_keys: | |
| - <your key> | |
| packages: | |
| - apt-transport-https | |
| - ca-certificates | |
| - curl |
| from sklearn.base import BaseEstimator, ClassifierMixin | |
| from scipy.special import expit, logit | |
| class SoftLabelClassifier(BaseEstimator, ClassifierMixin): | |
| def __init__(self, regressor, eps=0.001): | |
| self.regressor = regressor | |
| self.eps = eps | |
| def fit(self, X, y=None): |
| import dash | |
| from dash import dcc | |
| from dash import html | |
| from dash.dependencies import Input, Output | |
| import json | |
| clicked = [] | |
| external_stylesheets = ["https://codepen.io/chriddyp/pen/bWLwgP.css"] | |
| app = dash.Dash(external_stylesheets=external_stylesheets) |
| import random | |
| from typing import List, Union | |
| import shapely.geometry as geo | |
| from tqdm import tqdm | |
| MultiLineStringType = Union[List[geo.LineString], geo.MultiLineString] | |
| def overlap_lines( |
| # SETUP | |
| cuda_version = !nvcc --version | grep -Po '^.*release \K([0-9]+\.[0-9])' | sed 's/\.//g' | |
| # REQUIREMENTS | |
| from pathlib import Path | |
| reqs = f""" | |
| spacy[cuda{cuda_version.s},transformers,lookups]==3.2.1 | |
| """ | |
| Path("requirements.txt").write_text(reqs) | |
| !pip install --quiet -r requirements.txt |
| from spacy.tokenizer import Tokenizer | |
| class CTLTokenizer(Tokenizer): | |
| # https://stackoverflow.com/a/58718664 | |
| def __call__(self, string) -> spacy.tokens.Doc: | |
| string = self.clean_string(string) | |
| doc = super().__call__(string) | |
| return doc | |
| def clean_string(self, string: str) -> str: |
| from typing import Optional, Iterable, cast, List | |
| from thinc.api import get_current_ops, Ops | |
| from thinc.types import Ragged, Ints1d | |
| from spacy.pipeline.spancat import Suggester | |
| from spacy.tokens import Doc | |
| from spacy.util import registry | |
| @registry.misc("ngram_digits_suggester.v1") |
| RELEASE="15.2.0" | |
| mkdir /tmp/iosevka-font/v$RELEASE | |
| cd /tmp/iosevka-font/v$RELEASE | |
| wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-$RELEASE.zip | |
| wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-aile-$RELEASE.zip | |
| wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-curly-$RELEASE.zip | |
| wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-curly-slab-$RELEASE.zip | |
| wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-etoile-$RELEASE.zip | |
| wget https://github.com/be5invis/Iosevka/releases/download/v$RELEASE/ttf-iosevka-fixed-$RELEASE.zip |
| from spacy import displacy | |
| from spacy.tokens import Doc | |
| from spacy.util import get_lang_class | |
| from pathlib import Path | |
| words = ["The", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "fox"] | |
| heads = [3, 3, 3, 4, 4, 4, 8, 8, 5] | |
| deps = ["det", "amod", "amod", "nsubj", "ROOT", "prep", "pobj", "det", "amod"] | |
| doc = Doc(get_lang_class("en")().vocab, words=words, heads=heads, deps=deps) |