This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(1, 5, sharey=True, gridspec_kw={'wspace': 0}) | |
| fig.set_size_inches(16,6) | |
| x = y = 0 | |
| for issue in myCountries: | |
| train_l = len(time_series)-5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(4, 5, sharex=True) | |
| fig.set_size_inches(16,12) | |
| x = y = 0 | |
| for issue in time_series: | |
| train_l = len(time_series)-5 | |
| selected_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(4, 5, sharex=True) | |
| fig.set_size_inches(16,12) | |
| x = y = 0 | |
| for issue in time_series: | |
| if not issue.find(".com") > -1: | |
| continue |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| !pip install gdelt | |
| import gdelt | |
| gd = gdelt.gdelt(version=1) | |
| import os | |
| os.makedirs("data",exist_ok=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| mape_df = pd.DataFrame() | |
| fig, axs = plt.subplots(4, 5, sharex=True) | |
| fig.set_size_inches(16,12) | |
| x = y = 0 | |
| for issue in time_series: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(4, 5, sharex=True) | |
| fig.set_size_inches(16,12) | |
| x = y = 0 | |
| for issue in time_series: | |
| if not issue.find(".com") > -1: | |
| continue | |
| train_l = len(time_series)-5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| !pip install gdelt #make sure gdelt installed | |
| import pandas as pd, numpy as np, matplotlib.pyplot as plt, gdelt, os, datetime, warnings #imports | |
| gd = gdelt.gdelt(version=1) #instantiate object to pull gdelt files | |
| os.makedirs("data",exist_ok=True) #check if there's a data folder | |
| cur_date = datetime.datetime(2019,10,7)-datetime.timedelta(days=60) #start pulling from 60 days prior to 10/7 | |
| while cur_date < datetime.datetime(2019,10,7): #pull until 10/7 | |
| if not os.path.exists("data/%s-%s-%s.pkl"%(cur_date.year, cur_date.month, cur_date.day)): #if don't have |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| mySources = ["cnn.com", "washingtonpost.com", "nytimes.com", "foxnews.com"] | |
| #unnest the entries with multiple sources in them | |
| df = df.set_index(df.columns.drop('SOURCES',1).tolist()).SOURCES.str.split(';', expand=True).stack().reset_index().rename(columns={0:'SOURCES'}).loc[:, df.columns] | |
| df.DATE = df.DATE.apply(lambda x: str(x)) #convert date | |
| df.DATE = pd.to_datetime(df.DATE) | |
| df.fillna("", inplace=True) | |
| df.set_index("DATE", drop=True, inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from statsmodels.tsa.statespace.sarimax import SARIMAX | |
| def gen_SARIMA_result(p,d,q, df, issue, test_length): #takes pdq, data, issue to use | |
| s_model = SARIMAX(endog = df[issue][:-test_length], | |
| exog = df[[x for x in df.columns if x != issue]][:-test_length], | |
| order=(p,d,q), seasonal_order=(1,0,1,7)).fit() | |
| f_ru = df[[issue]].copy()[1:] #haven't bothered to change this, but it's the results |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig, axs = plt.subplots(1,3, figsize=(12,8)) | |
| issue = time_series.columns[0] | |
| test_length = 10 | |
| selected_series = time_series[[col for col in time_series.columns if (col.find(issue[issue.find("_"):]) > -1)]].shift()[1:].drop(columns=issue).add_suffix("_l1") | |
| pub_series = time_series[[col for col in time_series.columns if (col.find(issue[:issue.find("_")]) > -1)]].drop(columns=issue).shift()[1:].add_suffix("_l1") | |
| selected_series = selected_series.join(pub_series).join(time_series[issue]) | |
| x = 0 | |
| for p in [1,5,10]: |