Skip to content

Instantly share code, notes, and snippets.

@crides
Created March 25, 2021 16:54
Show Gist options
  • Select an option

  • Save crides/64a6801d62def7ed3cdae5a0a65295cc to your computer and use it in GitHub Desktop.

Select an option

Save crides/64a6801d62def7ed3cdae5a0a65295cc to your computer and use it in GitHub Desktop.
#include "steno.h"
#include "orthography.h"
#include <string.h>
#include <ctype.h>
// Returns how many chars to backspace, and what text (`output`) to append after
// NOTE assumes the suffix we get is valid, so no end of string checking
int8_t ortho_transform(const char *word, const char *suffix, char *output) {
const uint8_t word_len = strlen(word);
char rev[WORD_ENDING_SIZE];
// Invert `word` so that indexing is anchored to the end
steno_debug("rev: ");
for (uint8_t i = 0; i < WORD_ENDING_SIZE; i ++) {
if (word_len >= i) {
rev[i] = word[word_len - i - 1];
steno_debug("%c", rev[i]);
}
}
steno_debug_ln("");
steno_debug_ln("suffix: %s", suffix);
// { ([aeiou]c)$ or (ion)$ or (.*ur)e } + ly -> \1ally
if (strneq(suffix, "ly", 2)) {
if ((rev[0] == 'c' && strchr("aeiou", rev[1])) || strneq(rev, "noi", 3)) {
strcat(output, "ally");
strcat(output, suffix + 2);
return 0;
}
if (strneq(rev, "eru", 3)) {
strcat(output, "ally");
strcat(output, suffix + 2);
return 1;
}
}
// (t)e + (ry|ries) -> \1o\2
if (strneq("et", rev, 2) && (strneq("ry", suffix, 2) || strneq("ries", suffix, 4))) {
output[0] = 'o';
strcat(output, suffix);
return 1;
}
// ([naeiou])te? + (cy|cies) -> \1\2
if (strneq("cy", suffix, 2) || strneq("cies", suffix, 4)) {
const uint8_t t_start = rev[0] == 'e';
if (rev[t_start] == 't' && strchr("naeiou", rev[t_start + 1])) {
strcat(output, suffix);
return t_start + 1;
}
}
// word: (s|sh|x|z|zh)
// ((?:oa|ea|i|ee|oo|au|ou|l|n|(?<![gin]a)r|t)ch)
// + s(\w|$) -> \1es\2
// ([bcdfghjklmnpqrstvwxz])y + s(\w|$) -> \1ies\2
if (suffix[0] == 's') {
if ((strchr("sxz", rev[0]) || (rev[1] == 'h' && (rev[2] == 's' || rev[2] == 'z')))
|| (strneq("hc", rev, 2) && (strchr("ilnt", rev[2]) || strneq("ao", rev + 2, 2) || strneq("ae", rev + 2, 2)
|| strneq("ee", rev + 2, 2) || strneq("oo", rev + 2, 2)
|| strneq("ua", rev + 2, 2) || strneq("uo", rev + 2, 2)
|| (rev[2] == 'r' && !(rev[3] == 'a' && strchr("gin", rev[4])))))) {
output[0] = 'e';
strcat(output, suffix);
return 0;
}
if (rev[0] == 'y' && strchr("bcdfghjklmnpqrstvwxz", rev[1])) {
output[0] = 'i'; output[1] = 'e';
strcat(output, suffix);
return 1;
}
}
// (\w)ie + ing -> \1ying
if (strneq("ei", rev, 2) && isalpha(rev[2]) && strneq("ing", suffix, 3)) {
strcpy(output, "ying");
return 2;
}
// (\w[cdfghlmnpr])y + ist -> \1ist
if (strneq("ist", suffix, 3) && rev[0] == 'y' && strchr("cdfghlmnpr", rev[1])) {
output[0] = rev[1];
strcat(output, "ist");
return 1;
}
// (\w[bcdfghjklmnpqrstvwxz])y + ([abcdefghjklnopqrstuxz]) -> \1i\2
if (rev[0] == 'y' && strchr("bcdfghjklmnpqrstvwxz", rev[1]) && isalpha(rev[2]) && strchr("abcdefghjklnopqrstuxz", suffix[0])) {
strcat(output, "i");
strcat(output, suffix);
return 1;
}
// word: (\w[^aeiou]it)
// (\wct)
// (\w[^aeiou]is)e
// (\w[aeiou][bcdfghjklmnprstvwxyz]+at)e
// + er(s?) -> \1or\2
if (strneq("er", suffix, 2)) {
if (rev[0] == 't' && (rev[1] == 'c' || (rev[1] == 'i' && !strchr("aeiou", rev[2])))) {
output[0] = 'o';
strcat(output, suffix + 1);
return 0;
}
if (rev[0] == 'e') {
if (rev[1] == 's' && rev[2] == 'i' && !strchr("aeiou", rev[3])) {
output[0] = 'o';
strcat(output, suffix + 1);
return 1;
}
if (rev[1] == 't' && rev[2] == 'a' && strchr("bcdfghjklmnprstvwxyz", rev[3])) {
uint8_t i = 4;
for ( ; i < WORD_ENDING_SIZE && strchr("bcdfghjklmnprstvwxyz", rev[i]); i ++);
if (strchr("aeiou", rev[i])) {
output[0] = 'o';
strcat(output, suffix + 1);
return 1;
}
}
}
}
// (\w[bcdfghjklmnpqrstuvwxz])e + { ([aeiouy]\w) or ([aeoy]) } -> \1\2
if (rev[0] == 'e' && strchr("bcdfghjklmnpqrstuvwxz", rev[1]) && isalpha(rev[2]) \
&& ((strchr("aeiouy", suffix[0]) && isalpha(suffix[1])) || strchr("aeoy", suffix[0]))) {
strcat(output, suffix);
return 1;
}
// ([aeiouy](?:pod|log)) + ([aeiouy]) -> \1\2
if (strchr("aeiouy", rev[3]) && (strneq("gol", rev, 3) || strneq("dop", rev, 3)) && strchr("aeiouy", suffix[0])) {
strcat(output, suffix);
return 0;
}
// ((?:[bcdfghjklmnprstvwxyz]|qu)[ae]l) + y -> \1ly
if (suffix[0] == 'y' && rev[0] == 'l' && (rev[1] == 'a' || rev[1] == 'e')
&& (strneq("uq", rev + 2, 2) || strchr("bcdfghjklmnprstvwxyz", rev[2]))) {
output[0] = 'l'; output[1] = 'y';
return 0;
}
// XXX ((?:^|\\W)(?:[bcdfghjklmnprstvwxyz]+|[bcdfghjklmnprstvwxyz]*qu)[aeiou])([bdfgklmnprstz]) + (ed|en|er|ier|est|ing|y|ie|ies|iest|iness|ish|abl[ey]|ability|abilities) -> \1\2\2\3
// suffix: (ed|en|er|ier|est|in[g]|y|ie|ies|iest|iness|ish|abl[ey]|ability|abilities)
// word: ((?:[bcdfghjklmnprstvwxyz]+|[bcdfghjklmnprstvwxyz]*qu)[aeiou])([bdfgklmnprstz])
// ((?:[bcdfghjklmnprstvwxyz]|qu)a)([gbmptv])
// ((?:[bcdfghjklmnprstvwxyz]|qu)e)([gbpv])
// ((?:[bcdfghjklmnprstvwxyz]|qu)i)([gbmpv])
// ((?:[bcdfghjklmnprstvwxyz]|qu)o)([gbdlv])
// ([bcdfghjklmnprstvwxyz]u)([gbdlmntv])
// repl: \1\2\2\3
if ((suffix[0] == 'e' && (strchr("dnr", suffix[1]) || strneq(suffix + 1, "st", 2))) \
|| (strneq(suffix, "ab", 2) && (suffix[2] == 'l' && strchr("ey", suffix[3]))) \
|| (strneq(suffix + 2, "ilit", 4) && (suffix[6] == 'y' || strneq(suffix + 6, "ies", 3))) \
|| (suffix[0] == 'i' && strchr("ne", suffix[1])) // Omitting `ier`, `ies`, `iest`
|| strneq(suffix, "ish", 3)
|| suffix[0] == 'y') {
if ((rev[1] == 'a' && strchr("gbmptv", rev[0])) || (rev[1] == 'e' && strchr("gbpv", rev[0]))
|| (rev[1] == 'i' && strchr("gbmpv", rev[0])) || (rev[1] == 'o' && strchr("gbdlv", rev[0]))
|| (rev[1] == 'u' && strchr("gbdlmntv", rev[0]))) {
bool junk = strchr("bcdfghjklmnprstvwxyz", rev[2]);
if (junk || (rev[1] != 'u' && strneq("uq", rev + 2, 2))) {
output[0] = rev[0];
strcat(output, suffix);
return 0;
}
}
}
return -1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment