Created
May 25, 2020 03:20
-
-
Save shashankvemuri/6c81a7e9fcaa5810d841ea95e725ba25 to your computer and use it in GitHub Desktop.
sentiment analysis code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Import libraries | |
| import pandas as pd | |
| from bs4 import BeautifulSoup | |
| import matplotlib.pyplot as plt | |
| from urllib.request import urlopen, Request | |
| from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
| # Parameters | |
| n = 3 #the # of article headlines displayed per ticker | |
| tickers = ['AAPL', 'TSLA', 'AMZN'] | |
| # Get Data | |
| finwiz_url = 'https://finviz.com/quote.ashx?t=' | |
| news_tables = {} | |
| for ticker in tickers: | |
| url = finwiz_url + ticker | |
| req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) | |
| resp = urlopen(req) | |
| html = BeautifulSoup(resp, features="lxml") | |
| news_table = html.find(id='news-table') | |
| news_tables[ticker] = news_table | |
| try: | |
| for ticker in tickers: | |
| df = news_tables[ticker] | |
| df_tr = df.findAll('tr') | |
| print ('\n') | |
| print ('Recent News Headlines for {}: '.format(ticker)) | |
| for i, table_row in enumerate(df_tr): | |
| a_text = table_row.a.text | |
| td_text = table_row.td.text | |
| td_text = td_text.strip() | |
| print(a_text,'(',td_text,')') | |
| if i == n-1: | |
| break | |
| except KeyError: | |
| pass | |
| # Iterate through the news | |
| parsed_news = [] | |
| for file_name, news_table in news_tables.items(): | |
| for x in news_table.findAll('tr'): | |
| text = x.a.get_text() | |
| date_scrape = x.td.text.split() | |
| if len(date_scrape) == 1: | |
| time = date_scrape[0] | |
| else: | |
| date = date_scrape[0] | |
| time = date_scrape[1] | |
| ticker = file_name.split('_')[0] | |
| parsed_news.append([ticker, date, time, text]) | |
| # Sentiment Analysis | |
| analyzer = SentimentIntensityAnalyzer() | |
| columns = ['Ticker', 'Date', 'Time', 'Headline'] | |
| news = pd.DataFrame(parsed_news, columns=columns) | |
| scores = news['Headline'].apply(analyzer.polarity_scores).tolist() | |
| df_scores = pd.DataFrame(scores) | |
| news = news.join(df_scores, rsuffix='_right') | |
| # View Data | |
| news['Date'] = pd.to_datetime(news.Date).dt.date | |
| unique_ticker = news['Ticker'].unique().tolist() | |
| news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker} | |
| values = [] | |
| for ticker in tickers: | |
| dataframe = news_dict[ticker] | |
| dataframe = dataframe.set_index('Ticker') | |
| dataframe = dataframe.drop(columns = ['Headline']) | |
| print ('\n') | |
| print (dataframe.head()) | |
| mean = round(dataframe['compound'].mean(), 2) | |
| values.append(mean) | |
| df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) | |
| df = df.set_index('Ticker') | |
| df = df.sort_values('Mean Sentiment', ascending=False) | |
| print ('\n') | |
| print (df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment