Created
August 11, 2010 20:08
-
-
Save Wilfred/519638 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
def compare_esperanto_strings(x_mixed_case, y_mixed_case): | |
# case insensitive alphabetical sort | |
# permitting whole latin alphabet | |
if type(x_mixed_case) == str: | |
x = x_mixed_case.decode('utf8').lower().strip() | |
else: | |
x = x_mixed_case.lower().strip() | |
if type(y_mixed_case) == str: | |
y = y_mixed_case.decode('utf8').lower().strip() | |
else: | |
y = y_mixed_case.lower().strip() | |
# space is first in the alphabet so 'a b' comes before 'ab' | |
# - second so that affixes come first | |
# characters at the end just defensive programming | |
alphabet = [u' ', u'-', u'a', u'b', u'c', u'\u0109', u'd', u'e', u'f', u'g', u'\u011d', u'h', | |
u'\u0125', u'i', u'j', u'\u0135', u'k', u'l', u'm', u'n', u'o', u'p', | |
u'q', u'r', u's', u'\u015d', u't', u'u', u'\u016d', u'v', u'w', u'x', | |
u'y', u'z', u'\'', u'(', u')', u'.', u'*', u',', u'\u03c3', | |
# that's a σ (sigma) at the end | |
] | |
for i in range(min(len(x),len(y))): | |
if alphabet.index(x[i]) < alphabet.index(y[i]): | |
return -1 | |
elif alphabet.index(x[i]) > alphabet.index(y[i]): | |
return 1 | |
# longer strings come afterwards | |
if len(x) < len(y): | |
return -1 | |
elif len(x) > len(y): | |
return 1 | |
else: | |
return 0 | |
if __name__ == '__main__': | |
dump = open('dump.txt', 'r') | |
lines = dump.readlines() | |
lines.sort(cmp=compare_esperanto_strings) | |
for line in lines: | |
print line.strip() # getting trailing newlines without for some reason |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment