Skip to content

Instantly share code, notes, and snippets.

@pohzipohzi
Created November 3, 2017 15:35
Show Gist options
  • Save pohzipohzi/ad7942fc5545675022c1f31123e64c0c to your computer and use it in GitHub Desktop.
Save pohzipohzi/ad7942fc5545675022c1f31123e64c0c to your computer and use it in GitHub Desktop.
A scraper for forex factory economic calendar data
from bs4 import BeautifulSoup
import requests
import datetime
import logging
import csv
def setLogger():
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
filename='logs_file',
filemode='w')
console = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
def getEconomicCalendar(startlink,endlink):
# write to console current status
logging.info("Scraping data for link: {}".format(startlink))
# get the page and make the soup
baseURL = "https://www.forexfactory.com/"
r = requests.get(baseURL + startlink)
data = r.text
soup = BeautifulSoup(data, "lxml")
# get and parse table data, ignoring details and graph
table = soup.find("table", class_="calendar__table")
# do not use the ".calendar__row--grey" css selector (reserved for historical data)
trs = table.select("tr.calendar__row.calendar_row")
fields = ["date","time","currency","impact","event","actual","forecast","previous"]
# some rows do not have a date (cells merged)
curr_year = startlink[-4:]
curr_date = ""
curr_time = ""
for tr in trs:
# fields may mess up sometimes, see Tue Sep 25 2:45AM French Consumer Spending
# in that case we append to errors.csv the date time where the error is
try:
for field in fields:
data = tr.select("td.calendar__cell.calendar__{}.{}".format(field,field))[0]
# print(data)
if field=="date" and data.text.strip()!="":
curr_date = data.text.strip()
elif field=="time" and data.text.strip()!="":
# time is sometimes "All Day" or "Day X" (eg. WEF Annual Meetings)
if data.text.strip().find("Day")!=-1:
curr_time = "12:00am"
else:
curr_time = data.text.strip()
elif field=="currency":
currency = data.text.strip()
elif field=="impact":
# when impact says "Non-Economic" on mouseover, the relevant
# class name is "Holiday", thus we do not use the classname
impact = data.find("span")["title"]
elif field=="event":
event = data.text.strip()
elif field=="actual":
actual = data.text.strip()
elif field=="forecast":
forecast = data.text.strip()
elif field=="previous":
previous = data.text.strip()
dt = datetime.datetime.strptime(",".join([curr_year,curr_date,curr_time]),
"%Y,%a%b %d,%I:%M%p")
print(",".join([str(dt),currency,impact,event,actual,forecast,previous]))
except:
with open("errors.csv","a") as f:
csv.writer(f).writerow([curr_year,curr_date,curr_time])
# exit recursion when last available link has reached
if startlink==endlink:
logging.info("Successfully retrieved data")
return
# get the link for the next week and follow
follow = soup.select("a.calendar__pagination.calendar__pagination--next.next")
follow = follow[0]["href"]
getEconomicCalendar(follow,endlink)
if __name__ == "__main__":
"""
Run this using the command "python `script_name`.py >> `output_name`.csv"
"""
setLogger()
getEconomicCalendar("calendar.php?week=jan7.2007","calendar.php?week=dec24.2017")
@sheldonrobinson
Copy link

``
from bs4 import BeautifulSoup
from datetime import date, datetime
from typing import List
import urllib.request
import urllib.parse
import ssl
import json
from pytz import timezone

class PyEcoElement(object):

def __init__(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str):
    self.currency = currency
    self.event = event
    self.impact = impact
    self.time_utc = time_utc
    self.actual = actual
    self.forecast = forecast
    self.previous = previous

class PyEcoRoot(object):

def __init__(self, eco_elements : List[PyEcoElement]):
    self.eco_elements = eco_elements

class PyEcoCal:

def GetEconomicCalendar(self, query_date: datetime):
    base_url = "https://www.forexfactory.com/"

    ssl._create_default_https_context = ssl._create_unverified_context

    # ctx = ssl.create_default_context()
    # ctx.check_hostname = False
    # ctx.verify_mode = ssl.CERT_NONE

    # html = urllib.request.urlopen(url, context=ctx).read()

    # get the page and make the soup
    urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}"
    date_string = query_date.strftime('%Y-%m-%d')
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    response = opener.open(urleco)
    result = response.read().decode('utf-8', errors='replace')
    soup = BeautifulSoup(result, "html.parser")
    table = soup.find_all("tr", class_="calendar_row")
    cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip()

    eco_day = []
    for item in table:
        dict = {}

        dict["Currency"] = item.find_all("td", \
                                         {"class": "calendar__cell calendar__currency currency"})[0].text.strip()  # Currency
        dict["Event"] = item.find_all("span", \
                                      {"class": "calendar__event-title"})[0].text.strip()  # Event Name
        try:
            time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[
                0].div.text.strip()  # Time Eastern
            datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p')
        except:
            datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p')
        eastern_tz = timezone('US/Eastern')
        dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \
                                                        datetime_eastern.day, datetime_eastern.hour, \
                                                        datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z")
        impact = item.find_all("td", {"class": "impact"})

        for icon in range(0, len(impact)):
            dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0]

        try:
            actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text
            if actual_value is not None:
                dict["Actual"] = actual_value.strip()
            else:
                dict["Actual"] = item.find_all("td", \
                                {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip()  # Actual Value
        except:
            dict["Actual"] = ""
        try:
            dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[
                0].text.strip()  # forecasted Value
        except:
            dict["Forecast"] = ""
        try:
            dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip()  # Previous
        except:
            dict["Previous"] = ""

        eco_day.append(dict)

    events_array = []

    for row_dict in eco_day:
        eco_elem = PyEcoElement(
            row_dict["Currency"],
            row_dict["Event"],
            row_dict["Impact"],
            row_dict["Time_UTC"],
            row_dict["Actual"],
            row_dict["Forecast"],
            row_dict["Previous"]
        )
        events_array.append(eco_elem)

    eco_cal = PyEcoRoot(events_array)

    json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3)
    return json_object

if name == "main":
eco = PyEcoCal()
json = eco.GetEconomicCalendar(datetime.today())
print(json)
``

@Pauliwauli3
Copy link

And how can I fetch the next events for the next week

@theshreyansjain
Copy link

from bs4 import BeautifulSoup from datetime import date, datetime from typing import List import urllib.request import urllib.parse import ssl import json from pytz import timezone

class PyEcoElement(object):

def __init__(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str):
    self.currency = currency
    self.event = event
    self.impact = impact
    self.time_utc = time_utc
    self.actual = actual
    self.forecast = forecast
    self.previous = previous

class PyEcoRoot(object):

def __init__(self, eco_elements : List[PyEcoElement]):
    self.eco_elements = eco_elements

class PyEcoCal:

def GetEconomicCalendar(self, query_date: datetime):
    base_url = "https://www.forexfactory.com/"

    ssl._create_default_https_context = ssl._create_unverified_context

    # ctx = ssl.create_default_context()
    # ctx.check_hostname = False
    # ctx.verify_mode = ssl.CERT_NONE

    # html = urllib.request.urlopen(url, context=ctx).read()

    # get the page and make the soup
    urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}"
    date_string = query_date.strftime('%Y-%m-%d')
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    response = opener.open(urleco)
    result = response.read().decode('utf-8', errors='replace')
    soup = BeautifulSoup(result, "html.parser")
    table = soup.find_all("tr", class_="calendar_row")
    cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip()

    eco_day = []
    for item in table:
        dict = {}

        dict["Currency"] = item.find_all("td", \
                                         {"class": "calendar__cell calendar__currency currency"})[0].text.strip()  # Currency
        dict["Event"] = item.find_all("span", \
                                      {"class": "calendar__event-title"})[0].text.strip()  # Event Name
        try:
            time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[
                0].div.text.strip()  # Time Eastern
            datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p')
        except:
            datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p')
        eastern_tz = timezone('US/Eastern')
        dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \
                                                        datetime_eastern.day, datetime_eastern.hour, \
                                                        datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z")
        impact = item.find_all("td", {"class": "impact"})

        for icon in range(0, len(impact)):
            dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0]

        try:
            actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text
            if actual_value is not None:
                dict["Actual"] = actual_value.strip()
            else:
                dict["Actual"] = item.find_all("td", \
                                {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip()  # Actual Value
        except:
            dict["Actual"] = ""
        try:
            dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[
                0].text.strip()  # forecasted Value
        except:
            dict["Forecast"] = ""
        try:
            dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip()  # Previous
        except:
            dict["Previous"] = ""

        eco_day.append(dict)

    events_array = []

    for row_dict in eco_day:
        eco_elem = PyEcoElement(
            row_dict["Currency"],
            row_dict["Event"],
            row_dict["Impact"],
            row_dict["Time_UTC"],
            row_dict["Actual"],
            row_dict["Forecast"],
            row_dict["Previous"]
        )
        events_array.append(eco_elem)

    eco_cal = PyEcoRoot(events_array)

    json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3)
    return json_object

if name == "main": eco = PyEcoCal() json = eco.GetEconomicCalendar(datetime.today()) print(json)

This works, thanks buddy

@JasPrograms
Copy link

PS C:\Users\Jasper> python -u "c:\Users\Jasper\Downloads\Lot Size Calculator\main.py"
2023-07-24 21:32:36,080 - INFO - Scraping data for link: calendar.php?week=jan7.2007
2023-07-24 21:32:36,219 - WARNING - Table not found on the page. Exiting...

@OussemaFr
Copy link

Hey guys, discover this API providing access to all ForexFactory data. Unlock valuable insights and enhance your trading strategies efficiently.
Link to api: https://rapidapi.com/ousema.frikha/api/forex-factory-scraper1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment