Skip to content

Instantly share code, notes, and snippets.

@shihyu
Forked from pohzipohzi/forexfactory_econcal.py
Created July 16, 2019 16:51
Show Gist options
  • Save shihyu/b1df9e4eef463f5d831a9e96e27393fe to your computer and use it in GitHub Desktop.
Save shihyu/b1df9e4eef463f5d831a9e96e27393fe to your computer and use it in GitHub Desktop.
A scraper for forex factory economic calendar data
from bs4 import BeautifulSoup
import requests
import datetime
import logging
import csv
def setLogger():
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
filename='logs_file',
filemode='w')
console = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
def getEconomicCalendar(startlink,endlink):
# write to console current status
logging.info("Scraping data for link: {}".format(startlink))
# get the page and make the soup
baseURL = "https://www.forexfactory.com/"
r = requests.get(baseURL + startlink)
data = r.text
soup = BeautifulSoup(data, "lxml")
# get and parse table data, ignoring details and graph
table = soup.find("table", class_="calendar__table")
# do not use the ".calendar__row--grey" css selector (reserved for historical data)
trs = table.select("tr.calendar__row.calendar_row")
fields = ["date","time","currency","impact","event","actual","forecast","previous"]
# some rows do not have a date (cells merged)
curr_year = startlink[-4:]
curr_date = ""
curr_time = ""
for tr in trs:
# fields may mess up sometimes, see Tue Sep 25 2:45AM French Consumer Spending
# in that case we append to errors.csv the date time where the error is
try:
for field in fields:
data = tr.select("td.calendar__cell.calendar__{}.{}".format(field,field))[0]
# print(data)
if field=="date" and data.text.strip()!="":
curr_date = data.text.strip()
elif field=="time" and data.text.strip()!="":
# time is sometimes "All Day" or "Day X" (eg. WEF Annual Meetings)
if data.text.strip().find("Day")!=-1:
curr_time = "12:00am"
else:
curr_time = data.text.strip()
elif field=="currency":
currency = data.text.strip()
elif field=="impact":
# when impact says "Non-Economic" on mouseover, the relevant
# class name is "Holiday", thus we do not use the classname
impact = data.find("span")["title"]
elif field=="event":
event = data.text.strip()
elif field=="actual":
actual = data.text.strip()
elif field=="forecast":
forecast = data.text.strip()
elif field=="previous":
previous = data.text.strip()
dt = datetime.datetime.strptime(",".join([curr_year,curr_date,curr_time]),
"%Y,%a%b %d,%I:%M%p")
print(",".join([str(dt),currency,impact,event,actual,forecast,previous]))
except:
with open("errors.csv","a") as f:
csv.writer(f).writerow([curr_year,curr_date,curr_time])
# exit recursion when last available link has reached
if startlink==endlink:
logging.info("Successfully retrieved data")
return
# get the link for the next week and follow
follow = soup.select("a.calendar__pagination.calendar__pagination--next.next")
follow = follow[0]["href"]
getEconomicCalendar(follow,endlink)
if __name__ == "__main__":
"""
Run this using the command "python `script_name`.py >> `output_name`.csv"
"""
setLogger()
getEconomicCalendar("calendar.php?week=jan7.2007","calendar.php?week=dec24.2017")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment