ikegami-yukino · May 10, 2019 10:45
diff --git a/translate_sentiwordnet.py b/translate_sentiwordnet.py
 import re
 import sqlite3
 import time

 import requests

 DB_PATH = 'wnjpn.db'
 SWN_PATH = 'SentiWordNet_3.0.0_20130122.txt'
 URL = 'https://script.google.com/macros/s/Please_write_here/exec?text=%s&source=en&target=ja'
 RESULT_PATH = 'result.csv'
 re_sentence = re.compile('"([^"]+)"')


 def fetch_jp_lemma(synset, cursor):
    jp_lemma = []
    cursor.execute("SELECT wordid FROM sense WHERE synset = '%s' AND lang != 'eng'" % (synset))
    for x in cursor.fetchall():
        cursor.execute("SELECT lemma FROM word WHERE wordid = '%s' AND lang != 'eng'" % (x[0]))
        for y in cursor.fetchall():
            jp_lemma.append(y[0])
    return jp_lemma


 def translate(sentence):
    time.sleep(5)  # あまり短くすると利用制限に引っかかるので注意
    return requests.get(URL % sentence).content.decode('utf8')


 with open(SWN_PATH) as fd, open(RESULT_PATH, 'w') as rfd, sqlite3.connect(DB_PATH) as conn:
    cursor = conn.cursor()
    for line in fd.read().splitlines():
        if line.startswith('#'):
            continue
        POS, ID, PosScore, NegScore, SynsetTerms, Gloss = line.split('\t')
        synset_id = '%s-%s' % (ID, POS)
        jp_lemma = fetch_jp_lemma(synset_id, cursor)
        sentences = [translate(sentence) for sentence in re_sentence.findall(Gloss)]
        rfd.write('%s\t%s\t%s\t%s\t%s\n' % (synset_id, ','.join(jp_lemma) if jp_lemma else SynsetTerms,
                                            PosScore, NegScore, ','.join(sentences)))
	import re
	import sqlite3
	import time

	import requests

	DB_PATH = 'wnjpn.db'
	SWN_PATH = 'SentiWordNet_3.0.0_20130122.txt'
	URL = 'https://script.google.com/macros/s/Please_write_here/exec?text=%s&source=en&target=ja'
	RESULT_PATH = 'result.csv'
	re_sentence = re.compile('"([^"]+)"')


	def fetch_jp_lemma(synset, cursor):
	jp_lemma = []
	cursor.execute("SELECT wordid FROM sense WHERE synset = '%s' AND lang != 'eng'" % (synset))
	for x in cursor.fetchall():
	cursor.execute("SELECT lemma FROM word WHERE wordid = '%s' AND lang != 'eng'" % (x[0]))
	for y in cursor.fetchall():
	jp_lemma.append(y[0])
	return jp_lemma


	def translate(sentence):
	time.sleep(5) # あまり短くすると利用制限に引っかかるので注意
	return requests.get(URL % sentence).content.decode('utf8')


	with open(SWN_PATH) as fd, open(RESULT_PATH, 'w') as rfd, sqlite3.connect(DB_PATH) as conn:
	cursor = conn.cursor()
	for line in fd.read().splitlines():
	if line.startswith('#'):
	continue
	POS, ID, PosScore, NegScore, SynsetTerms, Gloss = line.split('\t')
	synset_id = '%s-%s' % (ID, POS)
	jp_lemma = fetch_jp_lemma(synset_id, cursor)
	sentences = [translate(sentence) for sentence in re_sentence.findall(Gloss)]
	rfd.write('%s\t%s\t%s\t%s\t%s\n' % (synset_id, ','.join(jp_lemma) if jp_lemma else SynsetTerms,
	PosScore, NegScore, ','.join(sentences)))