Skip to content

Instantly share code, notes, and snippets.

@farizrahman4u
Created February 19, 2019 11:14
Show Gist options
  • Save farizrahman4u/58b1d83c607b7f21ecef404552e49dbd to your computer and use it in GitHub Desktop.
Save farizrahman4u/58b1d83c607b7f21ecef404552e49dbd to your computer and use it in GitHub Desktop.
import json
with open('ner_dataset.csv', 'r') as f:
lines = f.readlines()
lines.pop(0)
lines.pop(0)
sentences = []
words = []
poss = []
tags = []
def csv(x):
y = []
buff = ''
escaped = False
for c in x:
if c == '"':
if escaped:
escaped = False
else:
escaped = True
elif c == ',':
if escaped:
buff += c
else:
y.append(buff)
buff = ''
else:
buff += c
if buff:
y.append(buff)
return y
for x in lines:
x = x[:-1]
if x.startswith("Sentence: "):
sentences.append([words, poss, tags])
words = []
poss = []
tags = []
x = x.split(": ")[1]
_, word, pos, tag = csv(x)
words.append(word)
poss.append(pos)
tags.append(tag)
sentences.append([words, poss, tags])
with open('data.json', 'w') as f:
json.dump(sentences, f)
with open('data.json', 'r') as f:
sentences = json.load(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment