Skip to content

Instantly share code, notes, and snippets.

@pcbje
Last active April 8, 2020 09:50
Show Gist options
  • Select an option

  • Save pcbje/fdd8c31cf32604c08aae294a7871ca2d to your computer and use it in GitHub Desktop.

Select an option

Save pcbje/fdd8c31cf32604c08aae294a7871ca2d to your computer and use it in GitHub Desktop.
# License: GPLv3
# https://github.com/aboSamoor/polyglot/blob/master/LICENSE
import polyglot
from polyglot.text import Text, Word
from flask import Flask, request
app = Flask(__name__)
# sudo yum install libicu libicu-devel.x86_64 gcc-c++ python3-devel
# pip3 install morfessor pycld2 pyicu numpy six polyglot Flask
# polyglot download embeddings2.no ner2.no
# curl -X POST http://localhost:5000 -d '{"content":"hello Jon Doe."}' -H "Content-Type: application/json"
@app.route('/', methods=['POST'])
def extract():
content = request.json['content']
language = request.json.get('language', None)
text = Text(content)
if language is not None:
text.language = language
entities = []
success = True
message = None
try:
prevIndex = 0
for es in text.entities:
if len(es) < 2:
continue
start = content.index(es[0], prevIndex)
prevIndex = start
end = content.index(es[-1], prevIndex) + len(es[-1])
prevIndex = start
entity = ' '.join(es)
spread = float(end - start) / len(entity)
if spread > 2:
continue
entities.append({
'entityType': es.tag,
'entity': entity,
'offset': {
'start': start,
'end': end
}
})
except Exception as e:
message = '%s' % e
success = False
return {
'entities': entities,
'success': success,
'message': message,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment