Skip to content

Instantly share code, notes, and snippets.

@naoyat
Last active December 23, 2015 10:09
Show Gist options
  • Save naoyat/6619385 to your computer and use it in GitHub Desktop.
Save naoyat/6619385 to your computer and use it in GitHub Desktop.
2013/09/16「嵐のPRMLハッカソン」で書いてたコード
amō indicative active present sg 1
amās indicative active present sg 2
amat indicative active present sg 3
amāmus indicative active present pl 1
amātis indicative active present pl 2
amant indicative active present pl 3
amābam indicative active imperfect sg 1
amābās indicative active imperfect sg 2
amābat indicative active imperfect sg 3
amābāmus indicative active imperfect pl 1
amābātis indicative active imperfect pl 2
amābant indicative active imperfect pl 3
amābō indicative active future sg 1
amābis indicative active future sg 2
amābit indicative active future sg 3
amābimus indicative active future pl 1
amābitis indicative active future pl 2
amābunt indicative active future pl 3
amāvī indicative active perfect sg 1
amāvistī indicative active perfect sg 2
amāvit indicative active perfect sg 3
amāvimus indicative active perfect pl 1
amāvistis indicative active perfect pl 2
amāvēre indicative active perfect pl 3
amāveram indicative active past-perfect sg 1
amāverās indicative active past-perfect sg 2
amāverat indicative active past-perfect sg 3
amāverāmus indicative active past-perfect pl 1
amāverātis indicative active past-perfect pl 2
amāverant indicative active past-perfect pl 3
amāverō indicative active future-perfect sg 1
amāveris indicative active future-perfect sg 2
amāverit indicative active future-perfect sg 3
amāverimus indicative active future-perfect pl 1
amāveritis indicative active future-perfect pl 2
amāverint indicative active future-perfect pl 3
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
IGNORE_CASE = True
VALID_TAIL_LENGTH = 10
from itertools import chain
def flatten_list(l):
return list(chain.from_iterable(l))
import numpy as np
# a b c d e f g h ij k l m n o p q r s t uv[w] x y z
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
def trans(ch):
def conv(i):
if i <= 7: return i
elif i in (8, 9): return 8
elif i <= 19: return i - 1
elif i in (20, 21): return 19
elif i <= 25: return i - 3
else: return -1
o = ord(ch)
if o in (256, 257): # a^
a, m, c = 0, True, o == 256
elif o in (274, 275): # e^
a, m, c = 4, True, o == 274
elif o in (298, 299): # i^
a, m, c = 8, True, o == 298
elif o in (332, 333): # o^
a, m, c = 13, True, o == 332
elif o in (362, 363): # u^
a, m, c = 19, True, o == 362
elif 65 <= o <= 90:
a, m, c = conv(o-65), False, True
elif 97 <= o <= 122:
a, m, c = conv(o-97), False, False
else:
a, m, c = None, False, False
# return a, m, c
if IGNORE_CASE:
vec = [0]*24
if a: vec[a] = 1
if m: vec[23] = 1
else:
vec = [0]*25
if a: vec[a] = 1
if m: vec[23] = 1
if c: vec[24] = 1
return vec
def vectorize_surface(surface):
surface = (" " * VALID_TAIL_LENGTH) + surface
tail = surface[:-VALID_TAIL_LENGTH-1:-1] # 後ろから10文字
return map(trans, tail)
def vectorize_mood(mood): # [indicative, ...]
vec = [0, 0, 0]
if mood == 'indicative': vec[0] = 1
return vec
def vectorize_voice(voice):
vec = [0, 0]
if voice == 'active': vec[0] = 1
if voice == 'passive': vec[1] = 1
return vec
def vectorize_tense(tense):
vec = [0, 0, 0, 0, 0, 0]
if tense == 'present': vec[0] = 1
if tense == 'imperfect': vec[1] = 1
if tense == 'future': vec[2] = 1
if tense == 'perfect': vec[3] = 1
if tense == 'past-perfect': vec[4] = 1
if tense == 'future-perfect': vec[5] = 1
return vec
def vectorize_number(number):
vec = [0, 0]
if number == 'sg': vec[0] = 1
if number == 'pl': vec[1] = 1
return vec
def vectorize_person(person):
vec = [0, 0, 0]
if 1 <= person <= 3:
vec[person] = 1
return vec
def load_data(data_filepath):
xs = []
ys = []
with open(data_filepath, 'r') as fp:
for line in fp:
# ['am¥xc4¥x81bitis', 'indicative', 'active', 'future', 'pl', '2']
surface_utf8, mood, voice, tense, number, person = line.rstrip().split('¥t')
surface = surface_utf8.decode('utf-8')
surface_v = vectorize_surface(surface)
mood_v = vectorize_mood(mood) # [indicative, ...]
voice_v = vectorize_voice(voice) # [active, passive]
tense_v = vectorize_tense(tense) # [present, imperfect, future, perfect, past-perfect, future-perfect]
number_v = vectorize_number(number) # [sg, pl]
person_v = vectorize_person(person) # [1, 2, 3]
x = flatten_list(surface_v)
y = mood_v + voice_v + tense_v + number_v + person_v
xs.append(x)
ys.append(y)
return np.array(xs), np.array(ys)
#if __name__ == '__main__':
# X, Y = load_data(sys.argv[1])
# print "X:", X
# print "Y:", Y
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import neurolab as nl
import sys
import latin_loader
if __name__ == '__main__':
X, Y = latin_loader.load_data("latin.dat")
w = X.shape[1]
h = Y.shape[1]
input_range = [[0, 1]] * w
net = nl.net.newff(input_range, [25, h])
err = net.train(X, Y, epochs=10, show=15) #, show=15)
print err
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment