Last active
January 1, 2022 22:04
-
-
Save marcosan93/da40d07930fb274457cc7e391b009060 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def getNews(ticker, days): | |
| """ | |
| Retrieves financial news over the course of a specified number of days for | |
| a given stock ticker. | |
| """ | |
| # List of news | |
| news = [] | |
| # How many days back to retrieve | |
| ago = datetime.now() - timedelta(days=days) | |
| # Getting news over the course of a year | |
| for i in tqdm(range(20, days, 20)): | |
| # The date range to gather news | |
| date_range = [ | |
| (ago+timedelta(days=i-20)).strftime("%Y-%m-%d"), | |
| (ago+timedelta(days=i)).strftime("%Y-%m-%d") | |
| ] | |
| # Grabbing the news | |
| resp = client.get_financial_news( | |
| s=ticker+".CC", | |
| from_=date_range[0], | |
| to=date_range[1], | |
| limit=100 | |
| ) | |
| # Adding to the news list | |
| news.extend(resp) | |
| # Full names of given cryptos (for the top cryptos right now) | |
| cc_name = { | |
| "btc": "bitcoin", | |
| "eth": "ethereum", | |
| "bnb": "binance", | |
| "usdt": "tether", | |
| "sol": "solana" | |
| } | |
| if ticker.lower() in cc_name: | |
| full_cc = cc_name[ticker.lower()] | |
| else: | |
| full_cc = None | |
| # Filtering out irrelevant news | |
| lst = [ | |
| i for i in news if sum(x in {ticker.lower(), cc_name[ticker.lower()]} for x in nltk.word_tokenize(i['title'].lower()))>=1 | |
| ] | |
| # Formatting the date | |
| news = pd.DataFrame(lst) | |
| news['date'] = news['date'].apply( | |
| lambda x: x[:10] | |
| ) | |
| return news |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment