naoyat · December 23, 2015 10:09
diff --git a/latin.dat b/latin.dat
 amō    indicative	active	present	sg	1
 amās	indicative	active	present	sg	2
 amat	indicative	active	present	sg	3
 amāmus	indicative	active	present	pl	1
 amātis	indicative	active	present	pl	2
 amant	indicative	active	present	pl	3
 amābam	indicative	active	imperfect	sg	1
 amābās	indicative	active	imperfect	sg	2
 amābat	indicative	active	imperfect	sg	3
 amābāmus	indicative	active	imperfect	pl	1
 amābātis	indicative	active	imperfect	pl	2
 amābant	indicative	active	imperfect	pl	3
 amābō	indicative	active	future	sg	1
 amābis	indicative	active	future	sg	2
 amābit	indicative	active	future	sg	3
 amābimus	indicative	active	future	pl	1
 amābitis	indicative	active	future	pl	2
 amābunt	indicative	active	future	pl	3
 amāvī	indicative	active	perfect	sg	1
 amāvistī	indicative	active	perfect	sg	2
 amāvit	indicative	active	perfect	sg	3
 amāvimus	indicative	active	perfect	pl	1
 amāvistis	indicative	active	perfect	pl	2
 amāvēre	indicative	active	perfect	pl	3
 amāveram	indicative	active	past-perfect	sg	1
 amāverās	indicative	active	past-perfect	sg	2
 amāverat	indicative	active	past-perfect	sg	3
 amāverāmus	indicative	active	past-perfect	pl	1
 amāverātis	indicative	active	past-perfect	pl	2
 amāverant	indicative	active	past-perfect	pl	3
 amāverō	indicative	active	future-perfect	sg	1
 amāveris	indicative	active	future-perfect	sg	2
 amāverit	indicative	active	future-perfect	sg	3
 amāverimus	indicative	active	future-perfect	pl	1
 amāveritis	indicative	active	future-perfect	pl	2
 amāverint	indicative	active	future-perfect	pl	3
diff --git a/latin_loader.py b/latin_loader.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import sys

 IGNORE_CASE = True
 VALID_TAIL_LENGTH = 10

 from itertools import chain
 def flatten_list(l):
    return list(chain.from_iterable(l))

 import numpy as np

 # a b c d e f g h ij k l  m  n  o  p  q  r  s  t  uv[w] x  y  z
 # 0 1 2 3 4 5 6 7 8  9 10 11 12 13 14 15 16 17 18 19    20 21 22
 def trans(ch):
    def conv(i):
        if i <= 7: return i
        elif i in (8, 9): return 8
        elif i <= 19: return i - 1
        elif i in (20, 21): return 19
        elif i <= 25: return i - 3
        else: return -1

    o = ord(ch)

    if o in (256, 257): # a^
        a, m, c = 0, True, o == 256
    elif o in (274, 275): # e^
        a, m, c = 4, True, o == 274
    elif o in (298, 299): # i^
        a, m, c = 8, True, o == 298
    elif o in (332, 333): # o^
        a, m, c = 13, True, o == 332
    elif o in (362, 363): # u^
        a, m, c = 19, True, o == 362
    elif 65 <= o <= 90:
        a, m, c = conv(o-65), False, True
    elif 97 <= o <= 122:
        a, m, c = conv(o-97), False, False
    else:
        a, m, c = None, False, False

    # return a, m, c
    if IGNORE_CASE:
        vec = [0]*24
        if a: vec[a] = 1
        if m: vec[23] = 1
    else:
        vec = [0]*25
        if a: vec[a] = 1
        if m: vec[23] = 1
        if c: vec[24] = 1
    return vec


 def vectorize_surface(surface):
    surface = (" " * VALID_TAIL_LENGTH) + surface
    tail = surface[:-VALID_TAIL_LENGTH-1:-1] # 後ろから10文字
    return map(trans, tail)


 def vectorize_mood(mood): # [indicative, ...]
    vec = [0, 0, 0]
    if mood == 'indicative': vec[0] = 1
    return vec

 def vectorize_voice(voice):
    vec = [0, 0]
    if voice == 'active': vec[0] = 1
    if voice == 'passive': vec[1] = 1
    return vec

 def vectorize_tense(tense):
    vec = [0, 0, 0, 0, 0, 0]
    if tense == 'present': vec[0] = 1
    if tense == 'imperfect': vec[1] = 1
    if tense == 'future': vec[2] = 1
    if tense == 'perfect': vec[3] = 1
    if tense == 'past-perfect': vec[4] = 1
    if tense == 'future-perfect': vec[5] = 1
    return vec

 def vectorize_number(number):
    vec = [0, 0]
    if number == 'sg': vec[0] = 1
    if number == 'pl': vec[1] = 1
    return vec

 def vectorize_person(person):
    vec = [0, 0, 0]
    if 1 <= person <= 3:
        vec[person] = 1
    return vec

 def load_data(data_filepath):
    xs = []
    ys = []

    with open(data_filepath, 'r') as fp:
        for line in fp:
            # ['am¥xc4¥x81bitis', 'indicative', 'active', 'future', 'pl', '2']
            surface_utf8, mood, voice, tense, number, person = line.rstrip().split('¥t')

            surface = surface_utf8.decode('utf-8')
            surface_v = vectorize_surface(surface)

            mood_v = vectorize_mood(mood) # [indicative, ...]
            voice_v = vectorize_voice(voice) # [active, passive]
            tense_v = vectorize_tense(tense) # [present, imperfect, future, perfect, past-perfect, future-perfect]
            number_v = vectorize_number(number) # [sg, pl]
            person_v = vectorize_person(person) # [1, 2, 3]

            x = flatten_list(surface_v)
            y = mood_v + voice_v + tense_v + number_v + person_v

            xs.append(x)
            ys.append(y)

    return np.array(xs), np.array(ys)


 #if __name__ == '__main__':
 #    X, Y = load_data(sys.argv[1])
 #    print "X:", X
 #    print "Y:", Y

diff --git a/nn.py b/nn.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import numpy as np
 import neurolab as nl
 import sys

 import latin_loader

 if __name__ == '__main__':
    X, Y = latin_loader.load_data("latin.dat")

    w = X.shape[1]
    h = Y.shape[1]

    input_range = [[0, 1]] * w

    net = nl.net.newff(input_range, [25, h])
    err = net.train(X, Y, epochs=10, show=15) #, show=15)
    print err
	amō indicative active present sg 1
	amās indicative active present sg 2
	amat indicative active present sg 3
	amāmus indicative active present pl 1
	amātis indicative active present pl 2
	amant indicative active present pl 3
	amābam indicative active imperfect sg 1
	amābās indicative active imperfect sg 2
	amābat indicative active imperfect sg 3
	amābāmus indicative active imperfect pl 1
	amābātis indicative active imperfect pl 2
	amābant indicative active imperfect pl 3
	amābō indicative active future sg 1
	amābis indicative active future sg 2
	amābit indicative active future sg 3
	amābimus indicative active future pl 1
	amābitis indicative active future pl 2
	amābunt indicative active future pl 3
	amāvī indicative active perfect sg 1
	amāvistī indicative active perfect sg 2
	amāvit indicative active perfect sg 3
	amāvimus indicative active perfect pl 1
	amāvistis indicative active perfect pl 2
	amāvēre indicative active perfect pl 3
	amāveram indicative active past-perfect sg 1
	amāverās indicative active past-perfect sg 2
	amāverat indicative active past-perfect sg 3
	amāverāmus indicative active past-perfect pl 1
	amāverātis indicative active past-perfect pl 2
	amāverant indicative active past-perfect pl 3
	amāverō indicative active future-perfect sg 1
	amāveris indicative active future-perfect sg 2
	amāverit indicative active future-perfect sg 3
	amāverimus indicative active future-perfect pl 1
	amāveritis indicative active future-perfect pl 2
	amāverint indicative active future-perfect pl 3
	#!/usr/bin/env python
	# -- coding: utf-8 --
	import sys

	IGNORE_CASE = True
	VALID_TAIL_LENGTH = 10

	from itertools import chain
	def flatten_list(l):
	return list(chain.from_iterable(l))

	import numpy as np

	# a b c d e f g h ij k l m n o p q r s t uv[w] x y z
	# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
	def trans(ch):
	def conv(i):
	if i <= 7: return i
	elif i in (8, 9): return 8
	elif i <= 19: return i - 1
	elif i in (20, 21): return 19
	elif i <= 25: return i - 3
	else: return -1

	o = ord(ch)

	if o in (256, 257): # a^
	a, m, c = 0, True, o == 256
	elif o in (274, 275): # e^
	a, m, c = 4, True, o == 274
	elif o in (298, 299): # i^
	a, m, c = 8, True, o == 298
	elif o in (332, 333): # o^
	a, m, c = 13, True, o == 332
	elif o in (362, 363): # u^
	a, m, c = 19, True, o == 362
	elif 65 <= o <= 90:
	a, m, c = conv(o-65), False, True
	elif 97 <= o <= 122:
	a, m, c = conv(o-97), False, False
	else:
	a, m, c = None, False, False

	# return a, m, c
	if IGNORE_CASE:
	vec = [0]*24
	if a: vec[a] = 1
	if m: vec[23] = 1
	else:
	vec = [0]*25
	if a: vec[a] = 1
	if m: vec[23] = 1
	if c: vec[24] = 1
	return vec


	def vectorize_surface(surface):
	surface = (" " * VALID_TAIL_LENGTH) + surface
	tail = surface[:-VALID_TAIL_LENGTH-1:-1] # 後ろから10文字
	return map(trans, tail)


	def vectorize_mood(mood): # [indicative, ...]
	vec = [0, 0, 0]
	if mood == 'indicative': vec[0] = 1
	return vec

	def vectorize_voice(voice):
	vec = [0, 0]
	if voice == 'active': vec[0] = 1
	if voice == 'passive': vec[1] = 1
	return vec

	def vectorize_tense(tense):
	vec = [0, 0, 0, 0, 0, 0]
	if tense == 'present': vec[0] = 1
	if tense == 'imperfect': vec[1] = 1
	if tense == 'future': vec[2] = 1
	if tense == 'perfect': vec[3] = 1
	if tense == 'past-perfect': vec[4] = 1
	if tense == 'future-perfect': vec[5] = 1
	return vec

	def vectorize_number(number):
	vec = [0, 0]
	if number == 'sg': vec[0] = 1
	if number == 'pl': vec[1] = 1
	return vec

	def vectorize_person(person):
	vec = [0, 0, 0]
	if 1 <= person <= 3:
	vec[person] = 1
	return vec

	def load_data(data_filepath):
	xs = []
	ys = []

	with open(data_filepath, 'r') as fp:
	for line in fp:
	# ['am¥xc4¥x81bitis', 'indicative', 'active', 'future', 'pl', '2']
	surface_utf8, mood, voice, tense, number, person = line.rstrip().split('¥t')

	surface = surface_utf8.decode('utf-8')
	surface_v = vectorize_surface(surface)

	mood_v = vectorize_mood(mood) # [indicative, ...]
	voice_v = vectorize_voice(voice) # [active, passive]
	tense_v = vectorize_tense(tense) # [present, imperfect, future, perfect, past-perfect, future-perfect]
	number_v = vectorize_number(number) # [sg, pl]
	person_v = vectorize_person(person) # [1, 2, 3]

	x = flatten_list(surface_v)
	y = mood_v + voice_v + tense_v + number_v + person_v

	xs.append(x)
	ys.append(y)

	return np.array(xs), np.array(ys)


	#if __name__ == '__main__':
	# X, Y = load_data(sys.argv[1])
	# print "X:", X
	# print "Y:", Y