Last active
December 16, 2022 18:58
-
-
Save AnnMarieW/6916be87b6e87250e998fc128b3f65cc to your computer and use it in GitHub Desktop.
spacy named entities app
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dash import Dash, dcc, html, Input, Output | |
import dash_bootstrap_components as dbc | |
import spacy | |
from spacy import displacy | |
import base64 | |
nlp = spacy.load("en_core_web_sm") | |
initial_text = "I went to Seattle" | |
#initial_text = """In ancient Rome, some neighbors live in three adjacent houses. In the center is the house of Senex, who lives there with wife Domina, son Hero, and several slaves, including head slave Hysterium and the musical's main character Pseudolus. A slave belonging to Hero, Pseudolus wishes to buy, win, or steal his freedom. One of the neighboring houses is owned by Marcus Lycus, who is a buyer and seller of beautiful women; the other belongs to the ancient Erronius, who is abroad searching for his long-lost children (stolen in infancy by pirates). One day, Senex and Domina go on a trip and leave Pseudolus in charge of Hero. Hero confides in Pseudolus that he is in love with the lovely Philia, one of the courtesans in the House of Lycus (albeit still a virgin).""" | |
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB]) | |
def get_svg(svg: str, style: str = ""): | |
"""Convert an SVG to a base64-encoded image.""" | |
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8") | |
html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>' | |
return html | |
app.layout = dbc.Container( | |
[ | |
html.H3( | |
"Natural Language Processing with spaCy", | |
className="text-center bg-primary text-white p-4 mb-4", | |
), | |
html.Div("Enter text to analyze"), | |
dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"), | |
html.H4("Dependency Parse and Part of Speech Tags"), | |
dbc.Card( | |
dcc.Markdown(id="html", dangerously_allow_html=True), | |
body=True, | |
className="mb-5", | |
), | |
], | |
) | |
@app.callback( | |
Output("html", "children"), Input("user-input", "value") | |
) | |
def display(text): | |
if text is None: | |
return None, None | |
# new lines mess up the displacy renderer | |
text = text.replace("\n", " ") | |
doc = nlp(text) | |
sentence_spans = list(doc.sents) | |
for sent in sentence_spans: | |
html = displacy.render( | |
sent, style="dep", | |
) | |
return get_svg(html) | |
if __name__ == "__main__": | |
app.run_server(debug=True) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dash | |
from dash import Dash, dcc, html, Input, Output, State, MATCH | |
import dash_bootstrap_components as dbc | |
import spacy | |
from spacy import displacy | |
nlp = spacy.load("en_core_web_sm") | |
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) | |
app.layout = dbc.Container( | |
dbc.Row( | |
dbc.Col( | |
[ | |
html.H3( | |
"Natural Language Processing with spaCy", | |
className="text-center bg-primary text-white p-2 mb-4", | |
), | |
dbc.Button( | |
"Add Text Input area", | |
id="pattern-match-add-card", | |
n_clicks=0, | |
className="mb-3", | |
), | |
html.Div(id="pattern-match-container", children=[], className="mt-4"), | |
] | |
) | |
), | |
fluid=True, | |
) | |
def get_entities(input_text): | |
# new lines mess up the displacy renderer | |
input_text = input_text.replace("\n", " ") | |
doc = nlp(input_text) | |
return displacy.render(doc.ents, style="ent") | |
def make_card(n_clicks): | |
return dbc.Card( | |
[ | |
dbc.CardHeader( | |
[ | |
f"Text Input {n_clicks + 1} ", | |
html.Div( | |
dbc.Button( | |
"X", | |
id={"type": "delete-card", "index": n_clicks}, | |
n_clicks=0, | |
color="secondary", | |
), | |
className="ms-auto", | |
), | |
], | |
className="hstack", | |
), | |
dbc.Row( | |
[ | |
dbc.Col( | |
dcc.Textarea( | |
id={"type": "text-input", "index": n_clicks}, | |
className="w-100", | |
), | |
), | |
dbc.Col( | |
dcc.Markdown( | |
id={"type": "entities", "index": n_clicks}, | |
dangerously_allow_html=True, | |
), | |
), | |
] | |
), | |
], | |
className="m-1", | |
id={"type": "card", "index": n_clicks}, | |
) | |
@app.callback( | |
Output("pattern-match-container", "children"), | |
Input("pattern-match-add-card", "n_clicks"), | |
State("pattern-match-container", "children"), | |
) | |
def add_card( | |
n_clicks, | |
cards, | |
): | |
new_card = make_card(n_clicks) | |
cards.append(new_card) | |
return cards | |
@app.callback( | |
Output({"type": "card", "index": MATCH}, "style"), | |
Input({"type": "delete-card", "index": MATCH}, "n_clicks"), | |
prevent_initial_call=True, | |
) | |
def remove_card(_): | |
return {"display": "none"} | |
@app.callback( | |
Output({"type": "entities", "index": MATCH}, "children"), | |
Input({"type": "text-input", "index": MATCH}, "value"), | |
) | |
def update_figure(text_input): | |
if text_input is None: | |
return dash.no_update | |
return get_entities(text_input) | |
if __name__ == "__main__": | |
app.run_server(debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dash import Dash, dash_table, dcc, html, Input, Output | |
import dash_bootstrap_components as dbc | |
import spacy | |
from spacy import displacy | |
import pandas as pd | |
from spacy.displacy.render import DEFAULT_LABEL_COLORS | |
nlp = spacy.load("en_core_web_sm") | |
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB]) | |
options = [ | |
{ | |
"label": html.Div( | |
[label], | |
style={ | |
"background-color": color, | |
"font-weight": "bold", | |
"padding": 5, | |
"border-radius": "0.35em", | |
}, | |
), | |
"value": label, | |
} | |
for label, color in DEFAULT_LABEL_COLORS.items() | |
] | |
label_dropdown = dbc.InputGroup( | |
[ | |
dcc.Dropdown(options, multi=True, id="label-dropdown", style={"width": 400}), | |
dbc.Button("Select All", id="all"), | |
] | |
) | |
app.layout = dbc.Container(label_dropdown) | |
@app.callback( | |
Output("label-dropdown", "value"), | |
Input("all", "n_clicks"), | |
) | |
def select_all_labels(_): | |
return list(DEFAULT_LABEL_COLORS.keys()) | |
if __name__ == "__main__": | |
app.run_server(debug=True) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dash import Dash, dash_table, dcc, html, Input, Output | |
import dash_bootstrap_components as dbc | |
import spacy | |
from spacy import displacy | |
import pandas as pd | |
nlp = spacy.load("en_core_web_sm") | |
initial_text = "I went to Seattle" | |
NER_ATTRS = ["text", "start_char", "end_char", "label_",] | |
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB]) | |
table = dash_table.DataTable( | |
id="table", | |
columns=[{"name": c, "id": c} for c in NER_ATTRS + ["description"]], | |
filter_action="native", | |
sort_action="native", | |
page_size=10, | |
style_table={"overflowX": "auto"}, | |
) | |
app.layout = dbc.Container( | |
[ | |
html.H3( | |
"Natural Language Processing with spaCy", | |
className="text-center bg-primary text-white p-4 mb-4", | |
), | |
html.Div("Enter text to analyze"), | |
dcc.Textarea(value=initial_text, id="user-input", className="w-100 mb-4"), | |
html.H4("Named Entities"), | |
dbc.Card( | |
dcc.Markdown(id="html", dangerously_allow_html=True), | |
body=True, | |
className="mb-5", | |
), | |
html.Div(table), | |
], | |
) | |
@app.callback( | |
Output("html", "children"), Output("table", "data"), Input("user-input", "value") | |
) | |
def display(text): | |
if text is None: | |
return None, None | |
# new lines mess up the displacy renderer | |
text = text.replace("\n", " ") | |
doc = nlp(text) | |
html = displacy.render(doc, style="ent") | |
# if doc.ents: | |
table_data = [ | |
[str(getattr(ent, attr)) for attr in NER_ATTRS] | |
for ent in doc.ents | |
# if ent.label_ in label_select | |
] | |
if table_data: | |
dff = pd.DataFrame(table_data, columns=NER_ATTRS) | |
dff["description"] = dff["label_"].apply(lambda x: spacy.explain(x)) | |
return html, dff.to_dict("records") | |
return html, None | |
if __name__ == "__main__": | |
app.run_server(debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
dependency parse

named entities

ent label dropdown