This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based on simplified version of Python snippet: https://gist.github.com/vadimkantorov/b26eda3645edb13feaa62b874a3e7f6f | |
function yaml_loads(frontamtter_str) | |
{ | |
const procval = s => (s.length >= 2 && s[0] == '"' && s[s.length - 1] == '"') ? s.slice(1, s.length - 1) : (s.length >= 2 && s[0] == "'" && s[s.length - 1] == "'") ? s.slice(1, s.length - 1) : s; | |
for(const line of frontmatter_str.split('\n')) | |
{ | |
const line_strip = line.trim(); | |
const is_list_item = line_strip.startsWith('- '); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based on https://github.com/tigt/mini-svg-data-uri/issues/24 | |
// Usage: cat myicon.svg | node svgdataurify.js | |
let svg = ""; | |
process.stdin.on("data", (chunk) => { svg += chunk; }); | |
process.stdin.on("end", async () => | |
{ | |
const reWhitespace = /\s+/g, reUrlHexPairs = /%[\dA-F]{2}/g, hexDecode = {'%20': ' ', '%3D': '=', '%3A': ':', '%2F': '/'}, specialHexDecode = match => hexDecode[match] || match.toLowerCase(); | |
if(svg.charCodeAt(0) === 0xfeff) svg = svg.slice(1); | |
svg = svg.trim().replace(reWhitespace, ' ').replaceAll('"', '\''); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from https://github.com/microsoft/WSL/issues/8151#issuecomment-2276363014 | |
curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - | |
sudo apt-get install -y nodejs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# prependfrontmatter ./index.html | |
alias prependfrontmatter="sed -i '1i---\n---'" | |
# https://unix.stackexchange.com/questions/99350/how-to-insert-text-before-the-first-line-of-a-file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python citygeocoder.py > '~citygeocoder.json' | |
# https://www.wikidata.org/wiki/Wikidata:SPARQL_tutorial/en | |
# https://github.com/OSMNames/OSMNames, http://github.com/OSMNames/OSMNames/issues/208 | |
# https://osmnames.org/download/ | |
# https://stackoverflow.com/questions/74261733/how-to-fetch-gps-coordinates-of-worlds-largest-cities-from-wikidata-via-sparql | |
# FIXME: for some reason misses Helsinki | |
import sys | |
import json | |
import urllib.parse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: to extract all eml files in current directory into the current directory: python uneml.py *.eml | |
import os | |
import sys | |
import email | |
import email.policy | |
for input_path in sys.argv[1:]: | |
print('eml', repr(input_path)) | |
eml = email.message_from_file(open(input_path), policy = email.policy.default) | |
for part in eml.walk(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://github.com/wp-cli/entity-command/issues/512 | |
# https://developer.wordpress.org/cli/commands/option/list/ | |
# https://developer.wordpress.org/cli/commands/option/update/ | |
# export options to a pretty-formatted JSON file | |
wp option list --format=json | jq '.' > wpoptionimpex.json | |
# [ | |
# { | |
# "option_name": "name1", | |
# "option_value": "value1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://en.cppreference.com/w/c/string/byte/strrchr | |
#include <stdio.h> | |
#include <string.h> | |
int main(int argc, char* argv[]) | |
{ | |
if(argc < 2) | |
return -1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# works only with a single, non-batched tensor of token ids | |
import torch | |
def bpedetokenize_loop(token_ids, token_utf8bytes, token_lens): | |
inds = torch.cat((torch.zeros_like(token_lens[:1]), token_lens.cumsum(-1))) | |
return torch.cat([token_utf8bytes[inds[i]:inds[i_p_1]] for i, i_p_1 in zip(token_ids, token_ids + 1)]) | |
def bpedetokenize_vec(token_ids, token_utf8bytes, token_lens): | |
inds_begin = torch.cat((torch.zeros_like(token_lens[:1]), token_lens[:-1].cumsum(-1))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# before running download captcha.onnx from https://huggingface.co/spaces/docparser/Text_Captcha_breaker | |
# python -m pip install numpy pillow onnxruntime --user --break-system-packages | |
import argparse | |
import PIL.Image | |
import numpy | |
import onnxruntime | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--model-path', default = 'captcha.onnx') |
NewerOlder