Skip to content

Instantly share code, notes, and snippets.

@zyocum
zyocum / tokens.py
Last active September 14, 2017 01:01
#!/usr/bin/env python3
"""Tokenize text naively based on unicode character properties"""
import json
import regex
import sys
from collections import namedtuple
#!/usr/bin/env python3
"""Attempt to generate false etymologies from phonetically similar words"""
import csv
import json
import sys
import random
from itertools import groupby
mbp-zyocu-4925:data-tools zyocum$ diff eng.en.fr.txt eng-forced.en.fr.txt
76c76
< Prenez possession de votre hamburger moderniste au four à prévoir ensemble sur ayurveda: larCPbzs https://t.co/zbQLrcKASK
---
> Prenez possession de votre hamburger moderniste à l'heure d'attendre ensemble sur ayurveda: larCPbzs https://t.co/zbQLrcKASK
78c78
< RT @SixGodDrake: Je ne changerai pas pour n'importe qui, je ne me soucie pas de ce que les gens pensent, parce que je suis moi et fier de ça.
---
> RT @SixGodDrake: Je ne changerai pas pour n'importe qui, je m'en fiche de ce que les gens pensent, parce que je suis moi et fier de ça.
81c81
#!/usr/bin/env python3
"""Generate 1-dimensional automata based on various rules"""
def ngrams(iterable, n=1):
"""Generate ngrams from an iterable"""
return zip(*(iterable[i:] for i in range(n)))
def states(state, rule, left_pad='0', right_pad='0'):
"""Generate a stream of states from an initial state and a rule"""
next_state = ''.join(rule[''.join(window)] for window in ngrams(state, 3))
@zyocum
zyocum / Clausie 10 sentences
Created May 26, 2015 18:01
Ollie 10 sentences
# Line 1: "The Cleveland Rams selected Graf in the 1942 NFL Draft, but he instead attended Harvard Business School. "
# Semantic graph: [selected/VBD
# nsubj:[Rams/NNPS det:The/DT nn:Cleveland/NNP]
# dobj:[Graf/NNP
# prep:[in/IN pobj:[Draft/NNP det:the/DT num:1942/CD nn:NFL/NNP]]]
# cc:but/CC
# conj:[attended/VBD
# nsubj:he/PRP
# advmod:instead/RB
# dobj:[School/NNP nn:Harvard/NNP nn:Business/NNP]]]