Skip to content

Instantly share code, notes, and snippets.

@yuya-kanai
Last active September 12, 2019 01:23
Show Gist options
  • Save yuya-kanai/afaba9389ed729365f2bce7a42957690 to your computer and use it in GitHub Desktop.
Save yuya-kanai/afaba9389ed729365f2bce7a42957690 to your computer and use it in GitHub Desktop.
data labeling tool for jupyter notebook
from IPython.display import display
from ipywidgets import Checkbox
import csv
import gensim
word_model = gensim.models.KeyedVectors.load_word2vec_format('model.vec', binary=False)
def ambiguous_word_labeling(ambiguous_word_list):
foundational_words = {}
def update_state(change, target):
foundational_words[target]['state'] = change.new
def save(change):
keep_word_set = {}
disregard_word_set = set()
remove_word_set = set()
with open('keep.csv', 'r') as f:
reader = csv.reader(f)
keep_word_set = set(list(reader)[0])
with open('remove.csv', 'r') as f:
reader = csv.reader(f)
remove_word_set = set(list(reader)[0])
for word,each_foundational_word in foundational_words.items():
if(each_foundational_word['state']=='キープ'):
keep_word_set.update(each_foundational_word['derived_words'])
elif(each_foundational_word['state']=='無視'):
disregard_word_set.update(each_foundational_word['derived_words'])
elif(each_foundational_word['state']=='捨てる'):
remove_word_set.update(each_foundational_word['derived_words'])
keep_word_set-=disregard_word_set
remove_word_set-=disregard_word_set
print("keep: ",keep_word_set)
print("remove: ",remove_word_set)
with open('keep.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(list(keep_word_set))
with open('remove.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(list(remove_word_set))
print('done')
tab_children = []
for name in range(1):
accordion_children=[]
for title in ambiguous_word_list[:]:
try:
similar_words = word_model.most_similar(positive=[title])
except:
continue
mapped_words = list(map(lambda x: x[0], similar_words))
mapped_words.append(title)
foundational_words[title]={}
foundational_words[title]['state'] = 'キープ'
foundational_words[title]['derived_words'] = mapped_words
items =[]
for i in mapped_words:
items.append(Checkbox(True, description=str(i)))
gridbox = widgets.GridBox(items, layout=widgets.Layout(grid_template_columns="repeat(4, 160px)"))
toggle_buttons = widgets.ToggleButtons(
options=[ 'キープ', '無視','捨てる',],
description='ラベル',
value='キープ',
disabled=False,
button_style='',
tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
)
toggle_buttons.observe(lambda x,title=title: update_state(x,target=title), names='value')
# htmlTitle = widgets.HTML(
# value="<h3>" + title +"</h3>",
# )
accordion_container = widgets.VBox([toggle_buttons,gridbox])
accordion_children.append( accordion_container)
accordion = widgets.Accordion(children=accordion_children)
for i_title, title in enumerate(ambiguous_word_list):
accordion.set_title(i_title,title)
tab_children.append(accordion)
save_button=widgets.Button(
value=False,
description='Save',
button_style='success',
tooltip='save file',
icon='save'
)
save_button.on_click(save)
tab_children.append(save_button)
display(widgets.VBox(tab_children))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment