linuskohl · June 4, 2019 16:19
diff --git a/naive-benchmark-on-news-headlines.py b/naive-benchmark-on-news-headlines.py
 # pip install newsapi-python
 import time
 import pandas as pd
 import numpy as np
 import seaborn as sns
 from newsapi import NewsApiClient

 newsapi = NewsApiClient(api_key=API_KEY)
 # get 100 latest news items
 top_news = newsapi.get_top_headlines(language='en', page_size=100)

 performance = pd.DataFrame(columns=['nr_words', 'nr_matches', 'time'])

 # iterate over article headlines, log duration, length of headline and number of identified entities
 for news in top_headlines["articles"]:
    title = news['title']
    nr_words = len(title.split())
    request_doc = AnalyzeInput(docId="sample_article", language="en")
    request_doc.text = title
    s_time = time.time() # track time
    res = ac.analyze(request_doc)
    nr_matches = len(res.matches)
    e_time = time.time() - s_time
    performance = performance.append({'nr_words': nr_words, 'nr_matches': nr_matches, 'time': e_time}, ignore_index=True)
	# pip install newsapi-python
	import time
	import pandas as pd
	import numpy as np
	import seaborn as sns
	from newsapi import NewsApiClient

	newsapi = NewsApiClient(api_key=API_KEY)
	# get 100 latest news items
	top_news = newsapi.get_top_headlines(language='en', page_size=100)

	performance = pd.DataFrame(columns=['nr_words', 'nr_matches', 'time'])

	# iterate over article headlines, log duration, length of headline and number of identified entities
	for news in top_headlines["articles"]:
	title = news['title']
	nr_words = len(title.split())
	request_doc = AnalyzeInput(docId="sample_article", language="en")
	request_doc.text = title
	s_time = time.time() # track time
	res = ac.analyze(request_doc)
	nr_matches = len(res.matches)
	e_time = time.time() - s_time
	performance = performance.append({'nr_words': nr_words, 'nr_matches': nr_matches, 'time': e_time}, ignore_index=True)