Count unigrams and bigrams in the Wolfart-Ahenakew nêhiyawêwin corpus!
When building a keyboard for typing Cree, it is useful to know which graphemes are typed often, and which pairs of graphemes are typed one
/** | |
* nonullfree.c -- throw errors when you call free(NULL) | |
* | |
* BUILDING: | |
* | |
* $ gcc -shared -fPIC nonullfree.c -ldl -o nonullfree.so | |
* | |
* USAGE (Linux): | |
* | |
* $ LD_PRELOAD=./nonullfree.so ./my-program |
import ctypes | |
import tempfile | |
import distutils.ccompiler | |
from pathlib import Path | |
from random import randint | |
SOURCE_CODE = f""" | |
int roll(void) {{ | |
return {randint(1, 6)}; |
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
# Copyright 2019 Eddie Antonio Santos <[email protected]> | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 |
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
# Install `fst_lookup` using pip: | |
# | |
# $ pip install fst-lookup | |
# | |
# Or, if you're using pipenv: | |
# | |
# $ pipenv install fst-lookup |
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
""" | |
dull(1) dull(1) | |
NAME | |
dull -- the opposite of pointed |
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
import re | |
from dataclasses import dataclass | |
from functools import partial | |
from typing import TypeVar, Generic, Callable | |
from unicodedata import normalize | |
A = TypeVar('A') |
<script> | |
var url = URL.createObjectURL(new Blob(['(', MyWebWorker.toString(), '())'], { | |
type: 'text/javascript' | |
})); | |
console.log(url); | |
var worker = new Worker(url); | |
worker.onmessage = function (event) { | |
console.log("from worker:", event); | |
} |
class Duration { | |
constructor(number) { | |
this._number = number; | |
} | |
valueOf() { | |
return this._number; | |
} | |
then(fn) { |
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
""" | |
Some ideas about how to organize data from the FST, to how to store Cree | |
wordforms. | |
Usage: | |
Analyzing a wordform descriptively yields "raw" FST output: |