Created
June 4, 2019 16:19
-
-
Save linuskohl/580a9056481dce98f417d5eeb027ff17 to your computer and use it in GitHub Desktop.
Naive benchmark of AmbiverseNLU on news headlines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install newsapi-python | |
import time | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
from newsapi import NewsApiClient | |
newsapi = NewsApiClient(api_key=API_KEY) | |
# get 100 latest news items | |
top_news = newsapi.get_top_headlines(language='en', page_size=100) | |
performance = pd.DataFrame(columns=['nr_words', 'nr_matches', 'time']) | |
# iterate over article headlines, log duration, length of headline and number of identified entities | |
for news in top_headlines["articles"]: | |
title = news['title'] | |
nr_words = len(title.split()) | |
request_doc = AnalyzeInput(docId="sample_article", language="en") | |
request_doc.text = title | |
s_time = time.time() # track time | |
res = ac.analyze(request_doc) | |
nr_matches = len(res.matches) | |
e_time = time.time() - s_time | |
performance = performance.append({'nr_words': nr_words, 'nr_matches': nr_matches, 'time': e_time}, ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment