|
''' |
|
Download Cocoa Futures Historical Data, traded in New York Board of Trade, from Yahoo Finance. |
|
|
|
author: Mauro Baraldi |
|
email: [email protected] |
|
revision: 0.0.1 |
|
date: 13/05/2025 |
|
buy me a coffee: https://buymeacoffee.com/maurobaraldi |
|
|
|
Contracts Settlement Date |
|
KCK25.NYB Coffee May 25 |
|
KCN25.NYB Coffee Jul 25 |
|
KCU25.NYB Coffee Sep 25 |
|
KCZ25.NYB Coffee Dec 25 |
|
KCH26.NYB Coffee Mar 26 |
|
KCK26.NYB Coffee May 26 |
|
KCN26.NYB Coffee Jul 26 |
|
KCU26.NYB Coffee Sep 26 |
|
KCZ26.NYB Coffee Dec 26 |
|
KCH27.NYB Coffee Mar 27 |
|
KCK27.NYB Coffee May 27 |
|
KCN27.NYB Coffee Jul 27 |
|
KCU27.NYB Coffee Sep 27 |
|
KCZ27.NYB Coffee Dec 27 |
|
|
|
For an updated list of cocoa contracts look for https://finance.yahoo.com/quote/KC%3DF/futures/ |
|
''' |
|
|
|
from csv import DictWriter |
|
from datetime import datetime |
|
from os import remove |
|
from time import sleep, time |
|
|
|
from lxml import html |
|
from requests import get |
|
from selenium import webdriver |
|
from selenium.webdriver.chrome.options import Options |
|
from selenium.webdriver.chrome.service import Service |
|
from webdriver_manager.chrome import ChromeDriverManager |
|
|
|
|
|
contracts = ["KCK25.NYB", "KCN25.NYB","KCU25.NYB","KCZ25.NYB","KCH26.NYB","KCK26.NYB","KCN26.NYB","KCU26.NYB","KCZ26.NYB","KCH27.NYB","KCK27.NYB","KCN27.NYB","KCU27.NYB","KCZ27.NYB"] |
|
|
|
def download(): |
|
|
|
# Options for web browser (Chromium) |
|
options = Options() |
|
options.add_argument('--headless') |
|
options.add_argument('--no-sandbox') |
|
options.add_argument('--disable-dev-shm-usage') |
|
|
|
for contract in contracts: |
|
|
|
print(f"Downloading contract {contract}...") |
|
|
|
# Web browser (Chromium) engine |
|
driver = webdriver.Chrome(service=Service("/usr/bin/chromedriver"), options=options) |
|
|
|
# Filter data since August 1 2022 (max) until today, |
|
driver.get(f"https://finance.yahoo.com/quote/{contract}/history/?filter=history&period1=1659326400&period2={int(time())}") |
|
|
|
# Wait to load modal for accepting or rejecting cookies |
|
sleep(3) |
|
|
|
# Reject cookies. |
|
modal = driver.find_element("xpath", "//button[contains(@class, 'btn') and contains(@class, 'secondary') and contains(@class, 'reject-all')]") |
|
driver.execute_script('arguments[0].click()', modal) |
|
|
|
# Wait to load historical data page |
|
sleep(3) |
|
|
|
# Scroll page until the end to load all data |
|
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") |
|
|
|
# Save the HTML data to parse it to CSV |
|
with open(f"./{contract}.html", "w") as _: |
|
_.write(driver.page_source) |
|
|
|
driver.close() |
|
print(f"Contract {contract} downloaded successfully.") |
|
|
|
# Wait 5 seconds for the next download to avoid block |
|
sleep(5) |
|
|
|
def convert_to_csv(): |
|
|
|
# Iterate over each contract |
|
for contract in contracts: |
|
|
|
result = [] |
|
|
|
with open(f"./{contract}.html", 'r', encoding='utf-8') as c: |
|
htmldoc = html.fromstring(c.read()) |
|
|
|
# Filter rows and columns to extract |
|
table = htmldoc.xpath("//table[contains(@class, 'table') and contains(@class, 'yf-1jecxey') and contains(@class, 'noDl') and contains(@class, 'hideOnPrint')]") |
|
columns = [i.text.strip() for i in table[0].xpath("//th")] |
|
rows = table[0].xpath("//tbody//tr") |
|
|
|
# Clean and prepare data |
|
for row in rows[::-1]: |
|
day = [datetime.strptime(row.getchildren()[0].text, '%b %d, %Y')]#.strftime("%Y-%m-%d")] |
|
ohlc = [float(i.text[:-3].replace(",", ".")) for i in row.getchildren()[1:-2]] |
|
volume = [float(row.getchildren()[-1].text.replace(",", ".").replace("-", "0"))] |
|
result.append(dict(zip(columns, day + ohlc + volume))) |
|
|
|
# Save data to CSV file |
|
with open(f'{contract}.csv', 'w', newline='') as csvfile: |
|
writer = DictWriter(csvfile, fieldnames=columns) |
|
writer.writeheader() |
|
writer.writerows(result) |
|
remove(f"./{contract}.html") |
|
|
|
if __name__ == "__main__": |
|
#download() |
|
convert_to_csv() |