pohzipohzi/forexfactory_econcal.py

Paposss · 2021-01-14T10:25:29Z

Getting a 503 Error.
This is because the User-Agent for Python's urllib is so obviously not a browser. You could always fake the User-Agent, but that's not really good (or moral) practice.

Any reason why you replaced the requests library with urllib?

thesoloist-1 · 2023-03-09T04:38:32Z

Super beginner coder here any help would be appreciated. I'm trying to run this in visual studio but every time I run it, it returns empty. Any ideas what might be the cause for it to not print anything?

sheldonrobinson · 2023-03-26T19:03:40Z

from bs4 import BeautifulSoup
from datetime import date, datetime
from typing import List
import urllib.request
import urllib.parse
import ssl
import json
from json import JSONEncoder
from pytz import timezone

class PyEcoElement(object):
def init(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str):
self.currency = currency
self.event = event
self.impact = impact
self.time_utc = time_utc
self.actual = actual
self.forecast = forecast
self.previous = previous

class PyEcoRoot(object):
def init(self, eco_elements : List[PyEcoElement]):
self.eco_elements = eco_elements

class PyEcoCal:

def GetEconomicCalendar(self, query_date: datetime, ):
    base_url = "https://www.forexfactory.com/"

    ssl._create_default_https_context = ssl._create_unverified_context

    # ctx = ssl.create_default_context()
    # ctx.check_hostname = False
    # ctx.verify_mode = ssl.CERT_NONE

    # html = urllib.request.urlopen(url, context=ctx).read()

    # get the page and make the soup
    urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}"
    date_string = query_date.strftime('%Y-%m-%d')
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    response = opener.open(urleco)
    result = response.read().decode('utf-8', errors='replace')
    soup = BeautifulSoup(result, "html.parser")
    table = soup.find_all("tr", class_="calendar_row")
    cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip()

    eco_day = []
    for item in table:
        dict = {}

        dict["Currency"] = item.find_all("td", \
                                         {"class": "calendar__cell calendar__currency currency"})[0].text.strip()  # Currency
        dict["Event"] = item.find_all("span", \
                                      {"class": "calendar__event-title"})[0].text.strip()  # Event Name
        try:
            time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[
                0].div.text.strip()  # Time Eastern
            datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p')
        except:
            datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p')
        eastern_tz = timezone('US/Eastern')
        dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \
                                                        datetime_eastern.day, datetime_eastern.hour, \
                                                        datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z")
        impact = item.find_all("td", {"class": "impact"})

        for icon in range(0, len(impact)):
            dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0]

        try:
            # dict["Actual"] = item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip()  # Actual Value
            actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text
            if actual_value is not None:
                dict["Actual"] = actual_value.strip()
            else:
                dict["Actual"] = item.find_all("td", \
                                {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip()  # Actual Value
        except:
            dict["Actual"] = ""
        try:
            dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[
                0].text.strip()  # forecasted Value
        except:
            dict["Forecast"] = ""
        try:
            dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip()  # Previous
        except:
            dict["Previous"] = ""

        eco_day.append(dict)

    events_array = []

    for row_dict in eco_day:
        eco_elem = PyEcoElement(
            row_dict["Currency"],
            row_dict["Event"],
            row_dict["Impact"],
            row_dict["Time_UTC"],
            row_dict["Actual"],
            row_dict["Forecast"],
            row_dict["Previous"]
        )
        events_array.append(eco_elem)

    eco_cal = PyEcoRoot(events_array)

    json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3)
    return json_object

if name == "main":
eco = PyEcoCal()
json = eco.GetEconomicCalendar(datetime.today())
print(json)

sheldonrobinson · 2023-03-26T19:05:58Z

Convert times to utc timestamps and handles edge case where actual found in span element.

sheldonrobinson · 2023-03-26T19:08:54Z

``
from bs4 import BeautifulSoup
from datetime import date, datetime
from typing import List
import urllib.request
import urllib.parse
import ssl
import json
from pytz import timezone

class PyEcoElement(object):

def __init__(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str):
    self.currency = currency
    self.event = event
    self.impact = impact
    self.time_utc = time_utc
    self.actual = actual
    self.forecast = forecast
    self.previous = previous

class PyEcoRoot(object):

def __init__(self, eco_elements : List[PyEcoElement]):
    self.eco_elements = eco_elements

class PyEcoCal:

def GetEconomicCalendar(self, query_date: datetime):
    base_url = "https://www.forexfactory.com/"

    ssl._create_default_https_context = ssl._create_unverified_context

    # ctx = ssl.create_default_context()
    # ctx.check_hostname = False
    # ctx.verify_mode = ssl.CERT_NONE

    # html = urllib.request.urlopen(url, context=ctx).read()

    # get the page and make the soup
    urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}"
    date_string = query_date.strftime('%Y-%m-%d')
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    response = opener.open(urleco)
    result = response.read().decode('utf-8', errors='replace')
    soup = BeautifulSoup(result, "html.parser")
    table = soup.find_all("tr", class_="calendar_row")
    cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip()

    eco_day = []
    for item in table:
        dict = {}

        dict["Currency"] = item.find_all("td", \
                                         {"class": "calendar__cell calendar__currency currency"})[0].text.strip()  # Currency
        dict["Event"] = item.find_all("span", \
                                      {"class": "calendar__event-title"})[0].text.strip()  # Event Name
        try:
            time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[
                0].div.text.strip()  # Time Eastern
            datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p')
        except:
            datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p')
        eastern_tz = timezone('US/Eastern')
        dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \
                                                        datetime_eastern.day, datetime_eastern.hour, \
                                                        datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z")
        impact = item.find_all("td", {"class": "impact"})

        for icon in range(0, len(impact)):
            dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0]

        try:
            actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text
            if actual_value is not None:
                dict["Actual"] = actual_value.strip()
            else:
                dict["Actual"] = item.find_all("td", \
                                {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip()  # Actual Value
        except:
            dict["Actual"] = ""
        try:
            dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[
                0].text.strip()  # forecasted Value
        except:
            dict["Forecast"] = ""
        try:
            dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip()  # Previous
        except:
            dict["Previous"] = ""

        eco_day.append(dict)

    events_array = []

    for row_dict in eco_day:
        eco_elem = PyEcoElement(
            row_dict["Currency"],
            row_dict["Event"],
            row_dict["Impact"],
            row_dict["Time_UTC"],
            row_dict["Actual"],
            row_dict["Forecast"],
            row_dict["Previous"]
        )
        events_array.append(eco_elem)

    eco_cal = PyEcoRoot(events_array)

    json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3)
    return json_object

if name == "main":
eco = PyEcoCal()
json = eco.GetEconomicCalendar(datetime.today())
print(json)
``

Pauliwauli3 · 2023-04-09T20:34:05Z

And how can I fetch the next events for the next week

theshreyansjain · 2023-04-28T20:37:16Z

from bs4 import BeautifulSoup from datetime import date, datetime from typing import List import urllib.request import urllib.parse import ssl import json from pytz import timezone

class PyEcoElement(object):

def __init__(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str):
    self.currency = currency
    self.event = event
    self.impact = impact
    self.time_utc = time_utc
    self.actual = actual
    self.forecast = forecast
    self.previous = previous

class PyEcoRoot(object):

def __init__(self, eco_elements : List[PyEcoElement]):
    self.eco_elements = eco_elements

class PyEcoCal:

def GetEconomicCalendar(self, query_date: datetime):
    base_url = "https://www.forexfactory.com/"

    ssl._create_default_https_context = ssl._create_unverified_context

    # ctx = ssl.create_default_context()
    # ctx.check_hostname = False
    # ctx.verify_mode = ssl.CERT_NONE

    # html = urllib.request.urlopen(url, context=ctx).read()

    # get the page and make the soup
    urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}"
    date_string = query_date.strftime('%Y-%m-%d')
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    response = opener.open(urleco)
    result = response.read().decode('utf-8', errors='replace')
    soup = BeautifulSoup(result, "html.parser")
    table = soup.find_all("tr", class_="calendar_row")
    cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip()

    eco_day = []
    for item in table:
        dict = {}

        dict["Currency"] = item.find_all("td", \
                                         {"class": "calendar__cell calendar__currency currency"})[0].text.strip()  # Currency
        dict["Event"] = item.find_all("span", \
                                      {"class": "calendar__event-title"})[0].text.strip()  # Event Name
        try:
            time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[
                0].div.text.strip()  # Time Eastern
            datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p')
        except:
            datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p')
        eastern_tz = timezone('US/Eastern')
        dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \
                                                        datetime_eastern.day, datetime_eastern.hour, \
                                                        datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z")
        impact = item.find_all("td", {"class": "impact"})

        for icon in range(0, len(impact)):
            dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0]

        try:
            actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text
            if actual_value is not None:
                dict["Actual"] = actual_value.strip()
            else:
                dict["Actual"] = item.find_all("td", \
                                {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip()  # Actual Value
        except:
            dict["Actual"] = ""
        try:
            dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[
                0].text.strip()  # forecasted Value
        except:
            dict["Forecast"] = ""
        try:
            dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip()  # Previous
        except:
            dict["Previous"] = ""

        eco_day.append(dict)

    events_array = []

    for row_dict in eco_day:
        eco_elem = PyEcoElement(
            row_dict["Currency"],
            row_dict["Event"],
            row_dict["Impact"],
            row_dict["Time_UTC"],
            row_dict["Actual"],
            row_dict["Forecast"],
            row_dict["Previous"]
        )
        events_array.append(eco_elem)

    eco_cal = PyEcoRoot(events_array)

    json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3)
    return json_object

if name == "main": eco = PyEcoCal() json = eco.GetEconomicCalendar(datetime.today()) print(json)

This works, thanks buddy

JasPrograms · 2023-07-25T01:33:06Z

PS C:\Users\Jasper> python -u "c:\Users\Jasper\Downloads\Lot Size Calculator\main.py"
2023-07-24 21:32:36,080 - INFO - Scraping data for link: calendar.php?week=jan7.2007
2023-07-24 21:32:36,219 - WARNING - Table not found on the page. Exiting...

OussemaFr · 2023-11-17T23:44:15Z

Hey guys, discover this API providing access to all ForexFactory data. Unlock valuable insights and enhance your trading strategies efficiently.
Link to api: https://rapidapi.com/ousema.frikha/api/forex-factory-scraper1

	from bs4 import BeautifulSoup
	import requests
	import datetime
	import logging
	import csv

	def setLogger():
	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	filename='logs_file',
	filemode='w')
	console = logging.StreamHandler()
	formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
	console.setFormatter(formatter)
	logging.getLogger('').addHandler(console)

	def getEconomicCalendar(startlink,endlink):

	# write to console current status
	logging.info("Scraping data for link: {}".format(startlink))

	# get the page and make the soup
	baseURL = "https://www.forexfactory.com/"
	r = requests.get(baseURL + startlink)
	data = r.text
	soup = BeautifulSoup(data, "lxml")

	# get and parse table data, ignoring details and graph
	table = soup.find("table", class_="calendar__table")

	# do not use the ".calendar__row--grey" css selector (reserved for historical data)
	trs = table.select("tr.calendar__row.calendar_row")
	fields = ["date","time","currency","impact","event","actual","forecast","previous"]

	# some rows do not have a date (cells merged)
	curr_year = startlink[-4:]
	curr_date = ""
	curr_time = ""
	for tr in trs:

	# fields may mess up sometimes, see Tue Sep 25 2:45AM French Consumer Spending
	# in that case we append to errors.csv the date time where the error is
	try:
	for field in fields:
	data = tr.select("td.calendar__cell.calendar__{}.{}".format(field,field))[0]
	# print(data)
	if field=="date" and data.text.strip()!="":
	curr_date = data.text.strip()
	elif field=="time" and data.text.strip()!="":
	# time is sometimes "All Day" or "Day X" (eg. WEF Annual Meetings)
	if data.text.strip().find("Day")!=-1:
	curr_time = "12:00am"
	else:
	curr_time = data.text.strip()
	elif field=="currency":
	currency = data.text.strip()
	elif field=="impact":
	# when impact says "Non-Economic" on mouseover, the relevant
	# class name is "Holiday", thus we do not use the classname
	impact = data.find("span")["title"]
	elif field=="event":
	event = data.text.strip()
	elif field=="actual":
	actual = data.text.strip()
	elif field=="forecast":
	forecast = data.text.strip()
	elif field=="previous":
	previous = data.text.strip()

	dt = datetime.datetime.strptime(",".join([curr_year,curr_date,curr_time]),
	"%Y,%a%b %d,%I:%M%p")
	print(",".join([str(dt),currency,impact,event,actual,forecast,previous]))
	except:
	with open("errors.csv","a") as f:
	csv.writer(f).writerow([curr_year,curr_date,curr_time])

	# exit recursion when last available link has reached
	if startlink==endlink:
	logging.info("Successfully retrieved data")
	return

	# get the link for the next week and follow
	follow = soup.select("a.calendar__pagination.calendar__pagination--next.next")
	follow = follow[0]["href"]
	getEconomicCalendar(follow,endlink)

	if __name__ == "__main__":
	"""
	Run this using the command "python `script_name`.py >> `output_name`.csv"
	"""
	setLogger()
	getEconomicCalendar("calendar.php?week=jan7.2007","calendar.php?week=dec24.2017")

pohzipohzi/forexfactory_econcal.py

Paposss commented Jan 14, 2021

Uh oh!

thesoloist-1 commented Mar 9, 2023

Uh oh!

sheldonrobinson commented Mar 26, 2023

Uh oh!

sheldonrobinson commented Mar 26, 2023

Uh oh!

sheldonrobinson commented Mar 26, 2023

Uh oh!

Pauliwauli3 commented Apr 9, 2023

Uh oh!

theshreyansjain commented Apr 28, 2023

Uh oh!

JasPrograms commented Jul 25, 2023

Uh oh!

OussemaFr commented Nov 17, 2023

Uh oh!