Last active
December 23, 2015 10:09
-
-
Save naoyat/6619385 to your computer and use it in GitHub Desktop.
2013/09/16「嵐のPRMLハッカソン」で書いてたコード
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
amō indicative active present sg 1 | |
amās indicative active present sg 2 | |
amat indicative active present sg 3 | |
amāmus indicative active present pl 1 | |
amātis indicative active present pl 2 | |
amant indicative active present pl 3 | |
amābam indicative active imperfect sg 1 | |
amābās indicative active imperfect sg 2 | |
amābat indicative active imperfect sg 3 | |
amābāmus indicative active imperfect pl 1 | |
amābātis indicative active imperfect pl 2 | |
amābant indicative active imperfect pl 3 | |
amābō indicative active future sg 1 | |
amābis indicative active future sg 2 | |
amābit indicative active future sg 3 | |
amābimus indicative active future pl 1 | |
amābitis indicative active future pl 2 | |
amābunt indicative active future pl 3 | |
amāvī indicative active perfect sg 1 | |
amāvistī indicative active perfect sg 2 | |
amāvit indicative active perfect sg 3 | |
amāvimus indicative active perfect pl 1 | |
amāvistis indicative active perfect pl 2 | |
amāvēre indicative active perfect pl 3 | |
amāveram indicative active past-perfect sg 1 | |
amāverās indicative active past-perfect sg 2 | |
amāverat indicative active past-perfect sg 3 | |
amāverāmus indicative active past-perfect pl 1 | |
amāverātis indicative active past-perfect pl 2 | |
amāverant indicative active past-perfect pl 3 | |
amāverō indicative active future-perfect sg 1 | |
amāveris indicative active future-perfect sg 2 | |
amāverit indicative active future-perfect sg 3 | |
amāverimus indicative active future-perfect pl 1 | |
amāveritis indicative active future-perfect pl 2 | |
amāverint indicative active future-perfect pl 3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import sys | |
IGNORE_CASE = True | |
VALID_TAIL_LENGTH = 10 | |
from itertools import chain | |
def flatten_list(l): | |
return list(chain.from_iterable(l)) | |
import numpy as np | |
# a b c d e f g h ij k l m n o p q r s t uv[w] x y z | |
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | |
def trans(ch): | |
def conv(i): | |
if i <= 7: return i | |
elif i in (8, 9): return 8 | |
elif i <= 19: return i - 1 | |
elif i in (20, 21): return 19 | |
elif i <= 25: return i - 3 | |
else: return -1 | |
o = ord(ch) | |
if o in (256, 257): # a^ | |
a, m, c = 0, True, o == 256 | |
elif o in (274, 275): # e^ | |
a, m, c = 4, True, o == 274 | |
elif o in (298, 299): # i^ | |
a, m, c = 8, True, o == 298 | |
elif o in (332, 333): # o^ | |
a, m, c = 13, True, o == 332 | |
elif o in (362, 363): # u^ | |
a, m, c = 19, True, o == 362 | |
elif 65 <= o <= 90: | |
a, m, c = conv(o-65), False, True | |
elif 97 <= o <= 122: | |
a, m, c = conv(o-97), False, False | |
else: | |
a, m, c = None, False, False | |
# return a, m, c | |
if IGNORE_CASE: | |
vec = [0]*24 | |
if a: vec[a] = 1 | |
if m: vec[23] = 1 | |
else: | |
vec = [0]*25 | |
if a: vec[a] = 1 | |
if m: vec[23] = 1 | |
if c: vec[24] = 1 | |
return vec | |
def vectorize_surface(surface): | |
surface = (" " * VALID_TAIL_LENGTH) + surface | |
tail = surface[:-VALID_TAIL_LENGTH-1:-1] # 後ろから10文字 | |
return map(trans, tail) | |
def vectorize_mood(mood): # [indicative, ...] | |
vec = [0, 0, 0] | |
if mood == 'indicative': vec[0] = 1 | |
return vec | |
def vectorize_voice(voice): | |
vec = [0, 0] | |
if voice == 'active': vec[0] = 1 | |
if voice == 'passive': vec[1] = 1 | |
return vec | |
def vectorize_tense(tense): | |
vec = [0, 0, 0, 0, 0, 0] | |
if tense == 'present': vec[0] = 1 | |
if tense == 'imperfect': vec[1] = 1 | |
if tense == 'future': vec[2] = 1 | |
if tense == 'perfect': vec[3] = 1 | |
if tense == 'past-perfect': vec[4] = 1 | |
if tense == 'future-perfect': vec[5] = 1 | |
return vec | |
def vectorize_number(number): | |
vec = [0, 0] | |
if number == 'sg': vec[0] = 1 | |
if number == 'pl': vec[1] = 1 | |
return vec | |
def vectorize_person(person): | |
vec = [0, 0, 0] | |
if 1 <= person <= 3: | |
vec[person] = 1 | |
return vec | |
def load_data(data_filepath): | |
xs = [] | |
ys = [] | |
with open(data_filepath, 'r') as fp: | |
for line in fp: | |
# ['am¥xc4¥x81bitis', 'indicative', 'active', 'future', 'pl', '2'] | |
surface_utf8, mood, voice, tense, number, person = line.rstrip().split('¥t') | |
surface = surface_utf8.decode('utf-8') | |
surface_v = vectorize_surface(surface) | |
mood_v = vectorize_mood(mood) # [indicative, ...] | |
voice_v = vectorize_voice(voice) # [active, passive] | |
tense_v = vectorize_tense(tense) # [present, imperfect, future, perfect, past-perfect, future-perfect] | |
number_v = vectorize_number(number) # [sg, pl] | |
person_v = vectorize_person(person) # [1, 2, 3] | |
x = flatten_list(surface_v) | |
y = mood_v + voice_v + tense_v + number_v + person_v | |
xs.append(x) | |
ys.append(y) | |
return np.array(xs), np.array(ys) | |
#if __name__ == '__main__': | |
# X, Y = load_data(sys.argv[1]) | |
# print "X:", X | |
# print "Y:", Y | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import numpy as np | |
import neurolab as nl | |
import sys | |
import latin_loader | |
if __name__ == '__main__': | |
X, Y = latin_loader.load_data("latin.dat") | |
w = X.shape[1] | |
h = Y.shape[1] | |
input_range = [[0, 1]] * w | |
net = nl.net.newff(input_range, [25, h]) | |
err = net.train(X, Y, epochs=10, show=15) #, show=15) | |
print err |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment