Skip to content

Instantly share code, notes, and snippets.

@rgov
Created October 4, 2017 03:00
Show Gist options
  • Save rgov/ade0094bc4f0e5c54813eabd8b925bfe to your computer and use it in GitHub Desktop.
Save rgov/ade0094bc4f0e5c54813eabd8b925bfe to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
This module attempts to implement syllabification for Greek words.
The consonant clusters are derived from "Greek: A Comprehensive Grammar of the
Modern Language" by D. Holton, et al.
'''
import phonemes
import re
__all__ = [ 'syllabificate', 'get_stressed_syllable', 'get_vowel',
'get_prevowel_sound' ]
vowelmodifiers = list(u'ˌˈ')
vowels = list(u'ieaouɪə')
palatals = list(u'jçɲʎ')
consonants = list(u'ptkfθxvδɣszlrmn') + palatals
wordinitial = list(consonants) + [
u'ps', u'ts', u'ks', u'pr', u'tr', u'kr',
u'pl', u'pn', u'kl', u'kn',
u'tm',
u'mn',
u'fr', u'fl', u'θl', u'θr', u'xl', u'xr', u'vl', u'δr', u'ɣl', u'ɣr',
u'sp', u'st', u'sk', u'sf', u'sθ', u'sx',
u'sn', u'zn', u'sl', u'zl',
u'zb',
u'ɣn', u'θn', u'xn', u'zm',
u'ft', u'fk', u'xt',
u'vδ', u'vɣ', u'ɣδ', u'zv', u'zɣ',
u'fθ', u'xθ', u'kt', u'pt',
u'bl', u'br', u'dr', u'gr', u'gl',
u'δj', u'νj', u'pç'
u'spl', u'spr', u'skl', u'str', u'skr',
u'skn',
u'sfr'
]
wordmedial = list(wordinitial) + [
u'tn', u'fn', u'vn', u'θm', u'vm',
u'rt', u'rδ', u'rf', u'rk', u'rx', u'rn', u'rm', u'rɣ', u'lm', u'lp', u'lk',
u'lt', u'lɣ',
u'nθ', u'nx', u'mf', u'ns', u'nδ', u'mv', u'nɣ', u'nz',
u'sxr', u'sxn', u'sθm', u'ptr',
u'ktr', u'kst', u'kδr', u'ngl', u'mbr', u'ftr', u'fst', u'vɣl' u'rtr',
u'kstr', u'fstr'
]
def greedy_split(phonemes):
patterns = list()
patterns.extend('^' + x for x in sorted(wordinitial, key=len, reverse=True))
patterns.extend(sorted(wordmedial, key=len, reverse=True))
patterns.extend('[' + ''.join(vowelmodifiers) + ']*' + v for v in vowels)
regex = '(' + '|'.join(patterns) + ')'
result = re.split(regex, ''.join(phonemes), flags=re.UNICODE)
return [ x for x in result if x != '' ]
def is_vowel(syllable):
return phonemes.strip_modifiers(syllable) in vowels
def is_consonant_cluster(syllable):
return all(not is_vowel(p) for p in syllable)
def form_syllables(l):
syllables, temp = [], ''
for i, group in enumerate(l):
# If this is consonant cluster, and it's the last group, tack it onto the
# previous syllable
if is_consonant_cluster(group) and i == len(l) - 1:
syllables[-1] += group
continue
# If this is a consonant cluster, and the next one is a consonant cluster,
# tack this onto the previous syllable too
if is_consonant_cluster(group) and is_consonant_cluster(l[i + 1]):
syllables[-1] += group
continue
# If this is a consonant cluster, and next up is a vowel, we get tacked
# onto that vowel
if is_consonant_cluster(group):
assert temp == ''
temp = group
continue
# If this is a vowel, then we form a new syllable
if is_vowel(group):
syllables.append(temp + group)
temp = ''
return tuple(syllables)
def syllabificate(phonemes):
return form_syllables(greedy_split(phonemes))
def get_stressed_syllable(syllables):
'''
Returns a negative index for the stressed syllable (i.e., last is -1)
'''
for i in xrange(-1, -len(syllables) - 1, -1):
if u'ˈ' in syllables[i]:
return i
raise Exception('stressed syllable not found')
def get_vowel(syllable):
for c in syllable:
if is_vowel(c):
return c
raise Exception('vowel not found')
def get_prevowel_sound(syllable):
syllable = phonemes.strip_modifiers(syllable)
for i, c in enumerate(syllable):
if is_vowel(c):
return syllable[:i-1]
return ''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment