Skip to content

Instantly share code, notes, and snippets.

@addiversitas
Last active September 2, 2021 00:40
Show Gist options
  • Save addiversitas/40564089b3e96b3a79391e1c47a70b60 to your computer and use it in GitHub Desktop.
Save addiversitas/40564089b3e96b3a79391e1c47a70b60 to your computer and use it in GitHub Desktop.
option chain web scraping example for AAPL
#load packages
library(quantmod)
library(rvest)
#get underlying stock info and last trade date
symbol <- "AAPL"
priceInfo <- getQuote(symbol)
lastPrice <- priceInfo$Last
date <- as.Date(priceInfo$`Trade Time`)
#settings for moneyness and time to maturity
moneynessBoundaries <- c(0.85,1.15)
ttmBoundaries <- c(7, 183)
#scrape full site
baseUrl <- paste0("https://finance.yahoo.com/quote/",symbol,"/options")
baseHTML <- read_html(baseUrl)
#get available expiries and convert to time to maturity
expiriesUNIX <- baseHTML %>% html_nodes("option") %>% html_attr("value")
expiries <- as.Date((baseHTML %>% html_nodes("option") %>% html_text()), format = "%b %d, %Y")
timeToMats <- as.numeric(expiries - date)
#select applicable expiries
sel <- timeToMats >= ttmBoundaries[1] & timeToMats <= ttmBoundaries[2]
expiriesUNIX <- expiriesUNIX[sel]
expiries <- expiries[sel]
timeToMats <- timeToMats[sel]
#loop over expiries to get calls and puts
calls <- NULL
puts <- NULL
for(i in 1:length(expiriesUNIX)){
expiryUrl <- paste0(baseUrl,"?date=",expiriesUNIX[i])
expiryHTML <- read_html(expiryUrl)
tmpCalls <- expiryHTML %>% html_nodes(".calls") %>% html_table()
if(length(tmpCalls) > 0){
tmpCalls <- tmpCalls[[1]]
#sometimes column names are in uppercase, sometimes not
colnames(tmpCalls) <- tolower(colnames(tmpCalls))
#remove thousand separator and convert to numeric if applicable
tmpCalls$strike <- as.numeric(gsub(",","",tmpCalls$strike))
#add time to maturity
tmpCalls$ttm <- timeToMats[i]
#calculate moneyness
tmpCalls$moneyness <- lastPrice/tmpCalls$strike
#convert yahoo finance IV to numeric
tmpCalls$ivOrig <- as.numeric(gsub("%","",gsub(",","",tmpCalls$`implied volatility`)))/100
calls <- rbind(calls, tmpCalls)
}
tmpPuts <- expiryHTML %>% html_nodes(".puts") %>% html_table()
if(length(tmpPuts) > 0){
tmpPuts <- tmpPuts[[1]]
#sometimes column names are in uppercase, sometimes not
colnames(tmpPuts) <- tolower(colnames(tmpPuts))
#remove thousand separator and convert to numeric if applicable
tmpPuts$strike <- as.numeric(gsub(",","",tmpPuts$strike))
#add time to maturity
tmpPuts$ttm <- timeToMats[i]
#calculate moneyness
tmpPuts$moneyness <- tmpPuts$strike/lastPrice
#convert yahoo finance IV to numeric
tmpPuts$ivOrig <- as.numeric(gsub("%","",gsub(",","",tmpPuts$`implied volatility`)))/100
puts <- rbind(puts, tmpPuts)
}
}
#select only calls within the applicable moneyness boundaries
calls <- calls[calls$moneyness >= moneynessBoundaries[1] & calls$moneyness <= moneynessBoundaries[2],]
#select only calls that have traded during the last 5 minutes of the stocks last trading day
calls <- calls[strptime(calls$`last trade date`,format = "%Y-%m-%d %I:%M%p") >= strptime(paste0(date," 3:55PM EDT"), format = "%Y-%m-%d %I:%M%p"),]
#select only puts within the applicable moneyness boundaries
puts <- puts[puts$moneyness >= moneynessBoundaries[1] & puts$moneyness <= moneynessBoundaries[2],]
#select only puts that have traded during the last 5 minutes of the stocks last trading day
puts <- puts[strptime(puts$`last trade date`,format = "%Y-%m-%d %I:%M%p") >= strptime(paste0(date," 3:55PM EDT"), format = "%Y-%m-%d %I:%M%p"),]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment