Skip to content

Instantly share code, notes, and snippets.

@knok
Created February 3, 2020 06:13
Show Gist options
  • Save knok/4e43a23895508fe91bc33ebee8e5f646 to your computer and use it in GitHub Desktop.
Save knok/4e43a23895508fe91bc33ebee8e5f646 to your computer and use it in GitHub Desktop.
make Japanese WordNet synonym list
import sqlite3
import os
import sys
fname = "wnjpn.db" # get from http://compling.hss.ntu.edu.sg/wnja/jpn/detail.html
conn = sqlite3.connect(fname)
def id2word(wordid):
w = conn.execute("select * from word where wordid = %s" % wordid)
for r in w:
word = r[2]
return word
def syset2wordids(synset):
s = conn.execute("select * from sense where lang='jpn' and synset = '%s'" % synset)
ids = []
for r in s:
ids.append(r[1])
return ids
def get_synsetlinks(synset):
cur = conn.execute("select * from synlink where synset1 = '%s'"
" and ( link = 'hype' or link = 'hypo' )" % synset)
links = []
for r in cur:
synset2 = r[1]
links.append(synset2)
return links
cur = conn.execute("select * from sense where lang = 'jpn'")
for i, row in enumerate(cur):
synset = row[0]
wordid = row[1]
word_ids = syset2wordids(synset)
links = get_synsetlinks(synset)
for sid in links:
ids = syset2wordids(sid)
word_ids.extend(ids)
words = []
for wordid in word_ids:
words.append(id2word(wordid))
print(" ".join(words))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment