Created
April 26, 2019 15:50
-
-
Save srvanderplas/3039f0c664fca0a4427c18889cccf512 to your computer and use it in GitHub Desktop.
Scrape Power Pool Data (from 2014)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/Rscript | |
# Scrape Southwest Power Pool LMP/SMP/MCC | |
library(scrapeR) | |
library(stringr) | |
library(lubridate) | |
library(ggplot2) | |
library(RMySQL) | |
library(plyr) | |
library(reshape2) | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
table <- xmlToDataFrame("http://www.spp.org/XML/LIP-Pricing.xml", stringsAsFactors=F) | |
d2 <- dbReadTable(con, "SWPower") | |
d2 <- unique(rbind(table, d2)) | |
d <- dbWriteTable(con, "SWPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
# Scrape MISO Pool LMP/MCC/MLC | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
tmp <- scrape("https://www.misoenergy.org/ria/Consolidated.aspx?format=xml")[[1]] | |
tmp2 <- getNodeSet(tmp, "//*/lmpdata")[[1]] | |
date <- xmlAttrs(getNodeSet(tmp, "//*/lmpdata")[[1]])[["refid"]] | |
table <- ldply(xmlChildren(xmlChildren(tmp2)[[1]]), function(i) as.data.frame(t(xmlAttrs(i)), stringsAsFactors=F)) | |
table$.id <- date | |
names(table)[1] <- "date" | |
d2 <- dbReadTable(con, "MisoPower") | |
d2 <- unique(rbind(table, d2)) | |
d <- dbWriteTable(con, "MisoPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
# Scrape CAISO (california) LMP | |
date <- Sys.time() | |
tz(date) <- "America/Los_Angeles" | |
hour(date) <- hour(date)+2 | |
date <- floor_date(date, "minute") | |
tmp <- scrape("http://oasis.caiso.com/mrtu-oasis/lmp/RTM/POINTMap.html")[[1]] | |
tmp2 <- getNodeSet(tmp, "/html/body/div[1]/div/map") | |
table <- ldply(xmlChildren(tmp2[[1]]), function(i){ | |
x <- as.data.frame(t(xmlAttrs(i))) | |
if(grepl("County:", x$alt)) return(data.frame()) | |
x$Node <- gsub("Node: ", "", word(x$alt, sep="\\r")) | |
# x$NodeType <- gsub("Node Type: ", "", word(x$alt, 2, sep="\\r\\n")) | |
x$MCP <- gsub("MARGINALCLEARINGPRICE: ", "", word(x$alt, 3, sep="\\r\\n")) | |
x$CONGESTLMP <- gsub("CONGESTLMP: ", "", word(x$alt, 5, sep="\\r\\n")) | |
x$COSTLMP <- gsub("COSTLMP: ", "", word(x$alt, 6, sep="\\r\\n")) | |
x$LOSSLMP <- gsub("LOSSLMP: ", "", word(x$alt, 7, sep="\\r\\n")) | |
return(data.frame(date=date, node=x$Node, mcp=x$MCP, congestlmp=x$CONGESTLMP, costlmp=x$COSTLMP, losslmp=x$LOSSLMP, stringsAsFactors=FALSE)) | |
} ) | |
table <- table[,-1] | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "CaisoPower") | |
d2 <- unique(rbind(table, d2)) | |
d <- dbWriteTable(con, "CaisoPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
#Scrape ERCOT - Texas power pool | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
tmp <- scrape("http://www.ercot.com/content/cdr/contours/rtmLmpHg.html")[[1]] | |
tmp2 <- getNodeSet(tmp, "//*/map")[[1]] | |
date <- gsub("Last Updated: ", "", xmlValue(getNodeSet(tmp2, "//*[@class='datestamp']")[[1]])) | |
table <- ldply(xmlChildren(tmp2), function(i){ | |
x <- xmlAttrs(i)[["title"]] | |
data.frame(date=date, location=word(x, sep=":"), lmp=as.numeric(word(x, 2, sep="\\$")), stringsAsFactors=FALSE) | |
}) | |
table <- table[,-1] | |
d2 <- dbReadTable(con, "ErcotPower") | |
d2 <- unique(rbind(table, d2)) | |
d <- dbWriteTable(con, "ErcotPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
# Scrape NEISO - Northeast Power pool | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
url <- "http://www.iso-ne.com/markets/5min_data/fiveMinLMP.do?submit=latest" | |
data <- read.csv(url, skip=5, header=FALSE, stringsAsFactors=FALSE) | |
header <- read.csv(url, skip=2, nrows=1, header=FALSE, stringsAsFactors=FALSE) | |
names(data) <- header | |
ids <- read.csv("http://www.iso-ne.com/histRpts/rt-lmp/lmp_rt_final_20140205.csv", skip=5, stringsAsFactors=FALSE) | |
ids <- unique(ids[,4:6]) | |
names(ids) <- c("ID", "NodeName", "NodeType") | |
data <- merge(data, ids, by.x="Location ID", by.y="ID") | |
names(data) <- c("ID", "isData", "DateTime", "Energy", "Congestion", "Loss", "LMP", "NodeName", "NodeType") | |
data <- data[,-2] | |
d2 <- dbReadTable(con, "NEisoPower") | |
d2 <- unique(rbind(data, d2)) | |
d <- dbWriteTable(con, "NEisoPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
#PJM (Pennsylvania Jersey etc) | |
url <- "http://www.pjm.com/pub/account/lmpgen/lmppost.html" | |
tmp <- scrape(url)[[1]] | |
date <- xmlValue(getNodeSet(tmp, "/html/body/center[2]/table/tr/td/font/b")[[1]]) | |
table1 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[4]/table")[[1]], stringsAsFactors=FALSE) | |
table1 <- do.call("cbind", lapply(1:ncol(table1), function(k) str_trim(table1[,k]))) | |
table1 <- as.data.frame(table1, stringsAsFactors=FALSE) | |
names(table1) <- table1[1,] | |
table1 <- table1[-1,] | |
table1 <- table1[,-which(apply(table1, 2, function(i) sum(nchar(i)!=0))==0)] | |
names(table1) <- c("NodeName", "NodeType", "LMP", "HourlyLMPHr22") | |
table1$LMP <- as.numeric(table1$LMP) | |
table1$HourlyLMPHr22 <- as.numeric(table1$HourlyLMPHr22) | |
table2 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[6]/table")[[1]], stringsAsFactors=FALSE) | |
table2 <- do.call("cbind", lapply(1:ncol(table2), function(k) str_trim(table2[,k]))) | |
table2 <- as.data.frame(table2, stringsAsFactors=FALSE) | |
names(table2) <- table2[1,] | |
table2 <- table2[-1,] | |
table2 <- table2[,-which(apply(table2, 2, function(i) sum(nchar(i)!=0))==0)] | |
table2$NodeType <- "Bus" | |
names(table2) <- c("NodeName", "LMP", "HourlyLMPHr22", "NodeType") | |
table2$LMP <- as.numeric(table2$LMP) | |
table2$HourlyLMPHr22 <- as.numeric(table2$HourlyLMPHr22) | |
table <- rbind.fill(table1, table2) | |
table$date <- date | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "PjmPower") | |
d2 <- unique(rbind(table, d2)) | |
d <- dbWriteTable(con, "PjmPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
# NYISO LMP | |
url <- "http://mis.nyiso.com/public/realtime/realtime_gen_lbmp.csv" | |
data <- read.csv(url, stringsAsFactors=FALSE) | |
names(data) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion") | |
data$Type <- "Generator" | |
url2 <- "http://mis.nyiso.com/public/realtime/realtime_zone_lbmp.csv" | |
data2 <- read.csv(url2, stringsAsFactors=FALSE) | |
names(data2) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion") | |
data2$Type <- "Zone" | |
data <- rbind.fill(data, data2) | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "NyisoPower") | |
d2 <- unique(rbind(data, d2)) | |
d <- dbWriteTable(con, "NyisoPower", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
#Natural Gas, Oil, Gas, etc. prices | |
url <- "http://www.bloomberg.com/energy/" | |
date <- Sys.Date() | |
tmp <- scrape(url)[[1]] | |
tmp2 <- getNodeSet(tmp, "//*/table") | |
data <- ldply(tmp2, function(i){ | |
nrow <- length(xmlChildren(i))-1 | |
header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue))) | |
header <- header[which(nchar(header)>0)] | |
x <- ldply(xmlChildren(i)[2:nrow], function(j){ | |
header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue))) | |
header <- header[which(nchar(header)>0)] | |
return(header) | |
}) | |
x <- x[,-1] | |
names(x) <- header | |
x[which(apply(x, 1, function(i) sum(i==names(x))==0)),] | |
}) | |
data$date <- date | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "EnergyMarkets") | |
names(data) <- names(d2) | |
d2 <- unique(rbind(data, d2)) | |
d <- dbWriteTable(con, "EnergyMarkets", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
#Metal future prices | |
url <- "http://www.bloomberg.com/markets/commodities/futures/metals/" | |
date <- Sys.Date() | |
tmp <- scrape(url)[[1]] | |
tmp2 <- getNodeSet(tmp, "//*/div[@class='std_data_container']") | |
data <- ldply(tmp2, function(i){ | |
class.name <- xmlValue(xmlChildren(i)[["h2"]]) | |
i <- xmlChildren(xmlChildren(i)[["div"]])[["table"]] | |
nrow <- length(xmlChildren(i))-1 | |
header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue))) | |
header <- header[which(nchar(header)>0)] | |
x <- ldply(xmlChildren(i)[2:nrow], function(j){ | |
header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue))) | |
header <- header[which(nchar(header)>0)] | |
return(header) | |
}) | |
x <- x[,-1] | |
names(x) <- header | |
x$class <- class.name | |
x[which(apply(x, 1, function(i) sum(i==names(x))==0)),] | |
}) | |
data$date <- date | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "MetalMarkets") | |
names(data) <- names(d2) | |
d2 <- unique(rbind(data, d2)) | |
d <- dbWriteTable(con, "MetalMarkets", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
# Natural Gas Markets | |
url <- "http://www.naturalgasintel.com/ext/resources/Daily-GPI/ICE.htm" | |
tmp <- scrape(url)[[1]] | |
tmp2 <- getNodeSet(tmp, "/html/body/table")[[1]] | |
rowlist <- xmlChildren(tmp2) | |
date <- xmlValue(xmlChildren(rowlist[[1]])[[1]]) | |
header <- str_trim(sapply(xmlChildren(rowlist[[2]]), xmlValue)) | |
header <- header[which(nchar(header)>0)] | |
header <- gsub("#", "NUM", header, fixed=TRUE) | |
data <- ldply(rowlist[-c(1:2)], function(i) { | |
x <- str_trim(unlist(sapply(xmlChildren(i), xmlValue))) | |
x <- x[which(nchar(x)>0)] | |
}) | |
data <- data[,-1] | |
names(data) <- header | |
data$TradeDate <- gsub("Trade Date: ", "", word(date, sep=";", start=1)) | |
data$FlowDate <- str_replace(word(date, sep=";", start=-1), " Flow Date\\(s\\): ", "") | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "NatGasMarket") | |
names(data) <- names(d2) | |
d2 <- unique(rbind(data, d2)) | |
d <- dbWriteTable(con, "NatGasMarket", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) | |
# Natural Gas Reserves | |
url <- "http://www.eia.gov/oil_gas/natural_gas/data_publications/eia914/eia914.html" | |
tmp <- scrape(url)[[1]] | |
tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[5]") | |
head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue)) | |
head1 <- head1[which(nchar(head1)>0)] | |
head1 <- c(head1[1], rep(head1[-1], each=2)) | |
data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){ | |
x <- str_trim(sapply(xmlChildren(i), xmlValue)) | |
x <- x[which(nchar(x)>0)] | |
return(x) | |
}) | |
data1 <- data[,-1] | |
names(data1) <- head1 | |
data <- data1[,c(1, 2, 4, 6, 8, 10)] | |
table <- melt(data, id.vars=1, measure.vars=2:6) | |
names(table) <- c("Date", "Area", "Value") | |
table$Area <- as.character(table$Area) | |
tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[6]") | |
head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue)) | |
head1 <- head1[which(nchar(head1)>0)] | |
head1 <- c(head1[1], rep(head1[-1], each=2)) | |
data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){ | |
x <- str_trim(sapply(xmlChildren(i), xmlValue)) | |
x <- x[which(nchar(x)>0)] | |
return(x) | |
}) | |
data1 <- data[,-1] | |
names(data1) <- head1 | |
data <- data1[,c(1, 2, 4, 6, 8, 10)] | |
data <- melt(data, id.vars=1, measure.vars=2:6) | |
names(data) <- c("Date", "Area", "Value") | |
data$Area <- as.character(data$Area) | |
table <- rbind(table, data) | |
table$Value <- as.numeric(str_sub(table$Value, 3, -1)) | |
con <- dbConnect(MySQL(), user="susan", dbname="susan") | |
d2 <- dbReadTable(con, "NatGasReserves") | |
names(table) <- names(d2) | |
d2 <- unique(rbind(table, d2)) | |
d <- dbWriteTable(con, "NatGasReserves", d2, append=FALSE, overwrite=TRUE) | |
dbDisconnect(con) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment