Skip to content

Instantly share code, notes, and snippets.

@shashankvemuri
Created May 25, 2020 03:20
Show Gist options
  • Select an option

  • Save shashankvemuri/6c81a7e9fcaa5810d841ea95e725ba25 to your computer and use it in GitHub Desktop.

Select an option

Save shashankvemuri/6c81a7e9fcaa5810d841ea95e725ba25 to your computer and use it in GitHub Desktop.
sentiment analysis code
# Import libraries
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Parameters
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}
for ticker in tickers:
url = finwiz_url + ticker
req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'})
resp = urlopen(req)
html = BeautifulSoup(resp, features="lxml")
news_table = html.find(id='news-table')
news_tables[ticker] = news_table
try:
for ticker in tickers:
df = news_tables[ticker]
df_tr = df.findAll('tr')
print ('\n')
print ('Recent News Headlines for {}: '.format(ticker))
for i, table_row in enumerate(df_tr):
a_text = table_row.a.text
td_text = table_row.td.text
td_text = td_text.strip()
print(a_text,'(',td_text,')')
if i == n-1:
break
except KeyError:
pass
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
for x in news_table.findAll('tr'):
text = x.a.get_text()
date_scrape = x.td.text.split()
if len(date_scrape) == 1:
time = date_scrape[0]
else:
date = date_scrape[0]
time = date_scrape[1]
ticker = file_name.split('_')[0]
parsed_news.append([ticker, date, time, text])
# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()
columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()
df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')
# View Data
news['Date'] = pd.to_datetime(news.Date).dt.date
unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}
values = []
for ticker in tickers:
dataframe = news_dict[ticker]
dataframe = dataframe.set_index('Ticker')
dataframe = dataframe.drop(columns = ['Headline'])
print ('\n')
print (dataframe.head())
mean = round(dataframe['compound'].mean(), 2)
values.append(mean)
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment'])
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment