This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import contextlib | |
import subprocess | |
@contextlib.contextmanager | |
def gzip(path, mode='r'): | |
"""Like gzip.open(), but using external gzip process which for some reason | |
is a lot faster on macOS.""" | |
try: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import subprocess | |
import random | |
import os | |
import re | |
import sys | |
import numpy as np | |
from collections import defaultdict | |
from itertools import accumulate | |
from bisect import bisect_right |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function hash(url) { | |
const r = await fetch(url, { | |
'credentials': 'omit', | |
'method': 'GET', | |
'mode': 'cors' | |
}); | |
const h = await crypto.subtle.digest('sha-256', await r.arrayBuffer()); | |
const a = Array.from(new Uint8Array(h)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import boto3 | |
import gzip | |
import sys | |
import mimetypes | |
from pprint import pprint | |
from typing import Dict, Any | |
mimetypes.add_type('application/wasm', '.wasm') | |
mimetypes.add_type('text/markdown', '.md') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import os | |
import signal | |
from traceback import print_exc | |
from subprocess import Popen, PIPE | |
from threading import Thread | |
from queue import SimpleQueue | |
from typing import Optional, TypeVar | |
from functools import wraps |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
from math import exp, log, floor | |
def reservoir_sample(k, it, *, rand: random.Random = random._inst): | |
sample = [] | |
numbered_it = enumerate(it) | |
for i, (_, line) in zip(range(k), numbered_it): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import gzip | |
import os | |
from collections import defaultdict | |
from xxhash import xxh64 | |
from unicodedata import category as cat | |
from unidecode import unidecode | |
from functools import reduce | |
from tqdm.autonotebook import tqdm |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const {Blob} = require('buffer'); | |
const fs = require('fs'); | |
const https = require('https'); | |
const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm'); | |
global.Module = {wasmBinary, onRuntimeInitialized}; | |
// Execute bergamot-translation-worker.js in this scope | |
const js = fs.readFileSync('./bergamot-translator-worker.js', {encoding:'utf8'}); | |
eval.call(global, js); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import struct | |
from pprint import pprint | |
import argparse | |
import mmap | |
from typing import NamedTuple | |
class Reader: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Generated by the protocol buffer compiler. DO NOT EDIT! | |
# source: sentencepiece_model.proto | |
"""Generated protocol buffer code.""" | |
from google.protobuf import descriptor as _descriptor | |
from google.protobuf import message as _message | |
from google.protobuf import reflection as _reflection | |
from google.protobuf import symbol_database as _symbol_database | |
# @@protoc_insertion_point(imports) |