Last active
September 2, 2021 00:40
-
-
Save addiversitas/40564089b3e96b3a79391e1c47a70b60 to your computer and use it in GitHub Desktop.
option chain web scraping example for AAPL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load packages | |
library(quantmod) | |
library(rvest) | |
#get underlying stock info and last trade date | |
symbol <- "AAPL" | |
priceInfo <- getQuote(symbol) | |
lastPrice <- priceInfo$Last | |
date <- as.Date(priceInfo$`Trade Time`) | |
#settings for moneyness and time to maturity | |
moneynessBoundaries <- c(0.85,1.15) | |
ttmBoundaries <- c(7, 183) | |
#scrape full site | |
baseUrl <- paste0("https://finance.yahoo.com/quote/",symbol,"/options") | |
baseHTML <- read_html(baseUrl) | |
#get available expiries and convert to time to maturity | |
expiriesUNIX <- baseHTML %>% html_nodes("option") %>% html_attr("value") | |
expiries <- as.Date((baseHTML %>% html_nodes("option") %>% html_text()), format = "%b %d, %Y") | |
timeToMats <- as.numeric(expiries - date) | |
#select applicable expiries | |
sel <- timeToMats >= ttmBoundaries[1] & timeToMats <= ttmBoundaries[2] | |
expiriesUNIX <- expiriesUNIX[sel] | |
expiries <- expiries[sel] | |
timeToMats <- timeToMats[sel] | |
#loop over expiries to get calls and puts | |
calls <- NULL | |
puts <- NULL | |
for(i in 1:length(expiriesUNIX)){ | |
expiryUrl <- paste0(baseUrl,"?date=",expiriesUNIX[i]) | |
expiryHTML <- read_html(expiryUrl) | |
tmpCalls <- expiryHTML %>% html_nodes(".calls") %>% html_table() | |
if(length(tmpCalls) > 0){ | |
tmpCalls <- tmpCalls[[1]] | |
#sometimes column names are in uppercase, sometimes not | |
colnames(tmpCalls) <- tolower(colnames(tmpCalls)) | |
#remove thousand separator and convert to numeric if applicable | |
tmpCalls$strike <- as.numeric(gsub(",","",tmpCalls$strike)) | |
#add time to maturity | |
tmpCalls$ttm <- timeToMats[i] | |
#calculate moneyness | |
tmpCalls$moneyness <- lastPrice/tmpCalls$strike | |
#convert yahoo finance IV to numeric | |
tmpCalls$ivOrig <- as.numeric(gsub("%","",gsub(",","",tmpCalls$`implied volatility`)))/100 | |
calls <- rbind(calls, tmpCalls) | |
} | |
tmpPuts <- expiryHTML %>% html_nodes(".puts") %>% html_table() | |
if(length(tmpPuts) > 0){ | |
tmpPuts <- tmpPuts[[1]] | |
#sometimes column names are in uppercase, sometimes not | |
colnames(tmpPuts) <- tolower(colnames(tmpPuts)) | |
#remove thousand separator and convert to numeric if applicable | |
tmpPuts$strike <- as.numeric(gsub(",","",tmpPuts$strike)) | |
#add time to maturity | |
tmpPuts$ttm <- timeToMats[i] | |
#calculate moneyness | |
tmpPuts$moneyness <- tmpPuts$strike/lastPrice | |
#convert yahoo finance IV to numeric | |
tmpPuts$ivOrig <- as.numeric(gsub("%","",gsub(",","",tmpPuts$`implied volatility`)))/100 | |
puts <- rbind(puts, tmpPuts) | |
} | |
} | |
#select only calls within the applicable moneyness boundaries | |
calls <- calls[calls$moneyness >= moneynessBoundaries[1] & calls$moneyness <= moneynessBoundaries[2],] | |
#select only calls that have traded during the last 5 minutes of the stocks last trading day | |
calls <- calls[strptime(calls$`last trade date`,format = "%Y-%m-%d %I:%M%p") >= strptime(paste0(date," 3:55PM EDT"), format = "%Y-%m-%d %I:%M%p"),] | |
#select only puts within the applicable moneyness boundaries | |
puts <- puts[puts$moneyness >= moneynessBoundaries[1] & puts$moneyness <= moneynessBoundaries[2],] | |
#select only puts that have traded during the last 5 minutes of the stocks last trading day | |
puts <- puts[strptime(puts$`last trade date`,format = "%Y-%m-%d %I:%M%p") >= strptime(paste0(date," 3:55PM EDT"), format = "%Y-%m-%d %I:%M%p"),] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment