Created
November 27, 2020 11:17
-
-
Save miczed/a6204c86ca3338916b9ed2973425e3f0 to your computer and use it in GitHub Desktop.
Polls the yahoo stock market site for the price of a given symbol and stores the result in a CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# importing libraries / dependencies | |
import os | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
import csv | |
import time | |
import datetime | |
import pytz | |
starttime = time.time() # get the current time (for interval calculatio) | |
date = datetime.datetime.now() # get the current date (for filename) | |
# specify options for google chrome in headless mode (runs without GUI) | |
chrome_options = Options() | |
chrome_options.add_argument("--headless") | |
interval = 2.0 # interval for crawler in seconds | |
symbol = "NDX" # symbol that is used in CSV file | |
market_open_hour = 14 # hour when the market opens (in UTC time) | |
market_open_minute = 30 # minute when the market opens | |
market_close_hour = 21 # hour when the market closes (in UTC time) | |
# run the google chrome driver | |
driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options) | |
driver.get("https://finance.yahoo.com/quote/%5ENDX?p=&guccounter=1") | |
# define the filename of the csv file (e.g. NDX-2020-11-26.csv) | |
csvfile = "{}-{}-{}-{}.csv".format(symbol, date.year, date.month, date.day) | |
# setting this to false will end the crawling | |
running = True | |
# open CSV file | |
with open(csvfile, 'w') as file: | |
writer = csv.writer(file) | |
writer.writerow(["symbol", "price", "timestamp"]) # write first line in csv file | |
while running: | |
# find the element by its xpath on the website | |
price = driver.find_element_by_xpath('//div[@id="quote-market-notice"]/preceding-sibling::span[2]').text | |
# get the current UTC timestamp | |
timestamp = datetime.datetime.now(pytz.utc) | |
# only store it, if market is open | |
if (timestamp.hour >= market_open_hour) and (timestamp.minute >= market_open_minute) and (timestamp.hour < market_close_hour): | |
print("symbol: " + symbol + ",price: " + price + ", timestamp: " + str(timestamp)) | |
# write it to CSV | |
writer.writerow([symbol, price, timestamp]) | |
elif timestamp.hour > market_close_hour: | |
print("market closed, ", str(timestamp)) | |
# stop crawling | |
running = False | |
else: | |
print("market not opened yet, ", str(timestamp)) | |
# repeat in the given interval (program will sleep for the interval specified) | |
time.sleep(interval - ((time.time() - starttime) % interval)) | |
driver.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment