Created
February 3, 2020 06:13
-
-
Save knok/4e43a23895508fe91bc33ebee8e5f646 to your computer and use it in GitHub Desktop.
make Japanese WordNet synonym list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
import os | |
import sys | |
fname = "wnjpn.db" # get from http://compling.hss.ntu.edu.sg/wnja/jpn/detail.html | |
conn = sqlite3.connect(fname) | |
def id2word(wordid): | |
w = conn.execute("select * from word where wordid = %s" % wordid) | |
for r in w: | |
word = r[2] | |
return word | |
def syset2wordids(synset): | |
s = conn.execute("select * from sense where lang='jpn' and synset = '%s'" % synset) | |
ids = [] | |
for r in s: | |
ids.append(r[1]) | |
return ids | |
def get_synsetlinks(synset): | |
cur = conn.execute("select * from synlink where synset1 = '%s'" | |
" and ( link = 'hype' or link = 'hypo' )" % synset) | |
links = [] | |
for r in cur: | |
synset2 = r[1] | |
links.append(synset2) | |
return links | |
cur = conn.execute("select * from sense where lang = 'jpn'") | |
for i, row in enumerate(cur): | |
synset = row[0] | |
wordid = row[1] | |
word_ids = syset2wordids(synset) | |
links = get_synsetlinks(synset) | |
for sid in links: | |
ids = syset2wordids(sid) | |
word_ids.extend(ids) | |
words = [] | |
for wordid in word_ids: | |
words.append(id2word(wordid)) | |
print(" ".join(words)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment