srvanderplas · April 26, 2019 15:50
diff --git a/ScrapePowerPools.R b/ScrapePowerPools.R
 #!/usr/bin/Rscript
 # Scrape Southwest Power Pool LMP/SMP/MCC

 library(scrapeR)
 library(stringr)
 library(lubridate)
 library(ggplot2)
 library(RMySQL)
 library(plyr)
 library(reshape2)

 con <- dbConnect(MySQL(), user="susan", dbname="susan")

 table <- xmlToDataFrame("http://www.spp.org/XML/LIP-Pricing.xml", stringsAsFactors=F)
 d2 <- dbReadTable(con, "SWPower")
 d2 <- unique(rbind(table, d2))
 d <- dbWriteTable(con, "SWPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)

 # Scrape MISO Pool LMP/MCC/MLC

 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 tmp <- scrape("https://www.misoenergy.org/ria/Consolidated.aspx?format=xml")[[1]]
 tmp2 <- getNodeSet(tmp, "//*/lmpdata")[[1]]
 date <- xmlAttrs(getNodeSet(tmp, "//*/lmpdata")[[1]])[["refid"]]
 table <- ldply(xmlChildren(xmlChildren(tmp2)[[1]]), function(i) as.data.frame(t(xmlAttrs(i)), stringsAsFactors=F))
 table$.id <- date
 names(table)[1] <- "date"

 d2 <- dbReadTable(con, "MisoPower")
 d2 <- unique(rbind(table, d2))
 d <- dbWriteTable(con, "MisoPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)


 # Scrape CAISO (california) LMP
 date <- Sys.time()
 tz(date) <- "America/Los_Angeles"
 hour(date) <- hour(date)+2
 date <- floor_date(date, "minute")
 tmp <- scrape("http://oasis.caiso.com/mrtu-oasis/lmp/RTM/POINTMap.html")[[1]]
 tmp2 <- getNodeSet(tmp, "/html/body/div[1]/div/map")

 table <- ldply(xmlChildren(tmp2[[1]]), function(i){
  x <- as.data.frame(t(xmlAttrs(i)))
  if(grepl("County:", x$alt)) return(data.frame())
  x$Node <- gsub("Node: ", "", word(x$alt, sep="\\r"))
 #   x$NodeType <- gsub("Node Type: ", "", word(x$alt, 2, sep="\\r\\n"))
  x$MCP <- gsub("MARGINALCLEARINGPRICE: ", "",  word(x$alt, 3, sep="\\r\\n"))
  x$CONGESTLMP <- gsub("CONGESTLMP: ", "",  word(x$alt, 5, sep="\\r\\n"))
  x$COSTLMP <- gsub("COSTLMP: ", "",  word(x$alt, 6, sep="\\r\\n"))
  x$LOSSLMP <- gsub("LOSSLMP: ", "",  word(x$alt, 7, sep="\\r\\n"))
  
  return(data.frame(date=date, node=x$Node, mcp=x$MCP, congestlmp=x$CONGESTLMP, costlmp=x$COSTLMP, losslmp=x$LOSSLMP, stringsAsFactors=FALSE))
 } )

 table <- table[,-1]
 con <- dbConnect(MySQL(), user="susan", dbname="susan")

 d2 <- dbReadTable(con, "CaisoPower")
 d2 <- unique(rbind(table, d2))
 d <- dbWriteTable(con, "CaisoPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)

 #Scrape ERCOT - Texas power pool
 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 tmp <- scrape("http://www.ercot.com/content/cdr/contours/rtmLmpHg.html")[[1]]
 tmp2 <- getNodeSet(tmp, "//*/map")[[1]]
 date <- gsub("Last Updated: ", "", xmlValue(getNodeSet(tmp2, "//*[@class='datestamp']")[[1]]))
 table <- ldply(xmlChildren(tmp2), function(i){
  x <-  xmlAttrs(i)[["title"]]
  data.frame(date=date, location=word(x, sep=":"), lmp=as.numeric(word(x, 2, sep="\\$")), stringsAsFactors=FALSE)
 })
 table <- table[,-1]

 d2 <- dbReadTable(con, "ErcotPower")
 d2 <- unique(rbind(table, d2))
 d <- dbWriteTable(con, "ErcotPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)

 # Scrape NEISO - Northeast Power pool
 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 url <- "http://www.iso-ne.com/markets/5min_data/fiveMinLMP.do?submit=latest"
 data <- read.csv(url, skip=5, header=FALSE, stringsAsFactors=FALSE)
 header <- read.csv(url, skip=2, nrows=1, header=FALSE, stringsAsFactors=FALSE)
 names(data) <- header
 ids <- read.csv("http://www.iso-ne.com/histRpts/rt-lmp/lmp_rt_final_20140205.csv", skip=5, stringsAsFactors=FALSE)
 ids <- unique(ids[,4:6])
 names(ids) <- c("ID", "NodeName", "NodeType")
 data <- merge(data, ids, by.x="Location ID", by.y="ID")
 names(data) <- c("ID", "isData", "DateTime", "Energy", "Congestion", "Loss", "LMP", "NodeName", "NodeType")
 data <- data[,-2]

 d2 <- dbReadTable(con, "NEisoPower")
 d2 <- unique(rbind(data, d2))
 d <- dbWriteTable(con, "NEisoPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)

 #PJM (Pennsylvania Jersey etc) 
 url <- "http://www.pjm.com/pub/account/lmpgen/lmppost.html"
 tmp <- scrape(url)[[1]]
 date <- xmlValue(getNodeSet(tmp, "/html/body/center[2]/table/tr/td/font/b")[[1]])
 table1 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[4]/table")[[1]], stringsAsFactors=FALSE)
 table1 <- do.call("cbind", lapply(1:ncol(table1), function(k) str_trim(table1[,k])))

 table1 <- as.data.frame(table1, stringsAsFactors=FALSE)
 names(table1) <- table1[1,]
 table1 <- table1[-1,]
 table1 <- table1[,-which(apply(table1, 2, function(i) sum(nchar(i)!=0))==0)]
 names(table1) <- c("NodeName", "NodeType", "LMP", "HourlyLMPHr22")
 table1$LMP <- as.numeric(table1$LMP)
 table1$HourlyLMPHr22 <- as.numeric(table1$HourlyLMPHr22)

 table2 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[6]/table")[[1]], stringsAsFactors=FALSE)
 table2 <- do.call("cbind", lapply(1:ncol(table2), function(k) str_trim(table2[,k])))

 table2 <- as.data.frame(table2, stringsAsFactors=FALSE)
 names(table2) <- table2[1,]
 table2 <- table2[-1,]
 table2 <- table2[,-which(apply(table2, 2, function(i) sum(nchar(i)!=0))==0)]
 table2$NodeType <- "Bus"
 names(table2) <- c("NodeName", "LMP", "HourlyLMPHr22", "NodeType")
 table2$LMP <- as.numeric(table2$LMP)
 table2$HourlyLMPHr22 <- as.numeric(table2$HourlyLMPHr22)

 table <- rbind.fill(table1, table2)
 table$date <- date

 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 d2 <- dbReadTable(con, "PjmPower")
 d2 <- unique(rbind(table, d2))
 d <- dbWriteTable(con, "PjmPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)


 # NYISO LMP
 url <- "http://mis.nyiso.com/public/realtime/realtime_gen_lbmp.csv"
 data <- read.csv(url, stringsAsFactors=FALSE)
 names(data) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion")
 data$Type <- "Generator"

 url2 <- "http://mis.nyiso.com/public/realtime/realtime_zone_lbmp.csv"
 data2 <- read.csv(url2, stringsAsFactors=FALSE)
 names(data2) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion")
 data2$Type <- "Zone"

 data <- rbind.fill(data, data2)
 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 d2 <- dbReadTable(con, "NyisoPower")
 d2 <- unique(rbind(data, d2))
 d <- dbWriteTable(con, "NyisoPower", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)


 #Natural Gas, Oil, Gas, etc. prices 

 url <- "http://www.bloomberg.com/energy/"
 date <- Sys.Date()
 tmp <- scrape(url)[[1]]
 tmp2 <- getNodeSet(tmp, "//*/table")
 data <- ldply(tmp2, function(i){
  nrow <- length(xmlChildren(i))-1
  header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue)))
  header <- header[which(nchar(header)>0)]
  x <- ldply(xmlChildren(i)[2:nrow], function(j){
          header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue)))
          header <- header[which(nchar(header)>0)]
          return(header)
        })
  x <- x[,-1]
  names(x) <- header
  x[which(apply(x, 1, function(i) sum(i==names(x))==0)),]
 })
 data$date <- date

 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 d2 <- dbReadTable(con, "EnergyMarkets")
 names(data) <- names(d2)
 d2 <- unique(rbind(data, d2))
 d <- dbWriteTable(con, "EnergyMarkets", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)


 #Metal future prices
 url <- "http://www.bloomberg.com/markets/commodities/futures/metals/"
 date <- Sys.Date()
 tmp <- scrape(url)[[1]]
 tmp2 <- getNodeSet(tmp, "//*/div[@class='std_data_container']")
 data <- ldply(tmp2, function(i){
  class.name <- xmlValue(xmlChildren(i)[["h2"]])
  i <- xmlChildren(xmlChildren(i)[["div"]])[["table"]]
  nrow <- length(xmlChildren(i))-1
  header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue)))
  header <- header[which(nchar(header)>0)]
  x <- ldply(xmlChildren(i)[2:nrow], function(j){
    header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue)))
    header <- header[which(nchar(header)>0)]
    return(header)
  })
  x <- x[,-1]
  names(x) <- header
  x$class <- class.name
  x[which(apply(x, 1, function(i) sum(i==names(x))==0)),]
 })
 data$date <- date

 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 d2 <- dbReadTable(con, "MetalMarkets")
 names(data) <- names(d2)
 d2 <- unique(rbind(data, d2))
 d <- dbWriteTable(con, "MetalMarkets", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)


 # Natural Gas Markets
 url <- "http://www.naturalgasintel.com/ext/resources/Daily-GPI/ICE.htm"
 tmp <- scrape(url)[[1]]
 tmp2 <- getNodeSet(tmp, "/html/body/table")[[1]]
 rowlist <- xmlChildren(tmp2)
 date <- xmlValue(xmlChildren(rowlist[[1]])[[1]])
 header <- str_trim(sapply(xmlChildren(rowlist[[2]]), xmlValue))
 header <- header[which(nchar(header)>0)]
 header <- gsub("#", "NUM", header, fixed=TRUE)
 data <- ldply(rowlist[-c(1:2)], function(i) {
  x <- str_trim(unlist(sapply(xmlChildren(i), xmlValue)))
  x <- x[which(nchar(x)>0)]
 })
 data <- data[,-1]
 names(data) <- header
 data$TradeDate <- gsub("Trade Date: ", "", word(date, sep=";", start=1))
 data$FlowDate <- str_replace(word(date, sep=";", start=-1), " Flow Date\\(s\\): ", "")

 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 d2 <- dbReadTable(con, "NatGasMarket")
 names(data) <- names(d2)
 d2 <- unique(rbind(data, d2))
 d <- dbWriteTable(con, "NatGasMarket", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)

 # Natural Gas Reserves
 url <- "http://www.eia.gov/oil_gas/natural_gas/data_publications/eia914/eia914.html"
 tmp <- scrape(url)[[1]]
 tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[5]")

 head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue))
 head1 <- head1[which(nchar(head1)>0)]
 head1 <- c(head1[1], rep(head1[-1], each=2))
 data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){
            x <- str_trim(sapply(xmlChildren(i), xmlValue))
            x <- x[which(nchar(x)>0)]
            return(x)
          })
 data1 <- data[,-1]
 names(data1) <- head1
 data <- data1[,c(1, 2, 4, 6, 8, 10)]

 table <- melt(data, id.vars=1, measure.vars=2:6)
 names(table) <- c("Date", "Area", "Value")
 table$Area <- as.character(table$Area)

 tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[6]")

 head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue))
 head1 <- head1[which(nchar(head1)>0)]
 head1 <- c(head1[1], rep(head1[-1], each=2))
 data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){
  x <- str_trim(sapply(xmlChildren(i), xmlValue))
  x <- x[which(nchar(x)>0)]
  return(x)
 })
 data1 <- data[,-1]
 names(data1) <- head1
 data <- data1[,c(1, 2, 4, 6, 8, 10)]

 data <- melt(data, id.vars=1, measure.vars=2:6)
 names(data) <- c("Date", "Area", "Value")
 data$Area <- as.character(data$Area)

 table <- rbind(table, data)
 table$Value <- as.numeric(str_sub(table$Value, 3, -1))

 con <- dbConnect(MySQL(), user="susan", dbname="susan")
 d2 <- dbReadTable(con, "NatGasReserves")
 names(table) <- names(d2)
 d2 <- unique(rbind(table, d2))
 d <- dbWriteTable(con, "NatGasReserves", d2, append=FALSE, overwrite=TRUE)
 dbDisconnect(con)
	#!/usr/bin/Rscript
	# Scrape Southwest Power Pool LMP/SMP/MCC

	library(scrapeR)
	library(stringr)
	library(lubridate)
	library(ggplot2)
	library(RMySQL)
	library(plyr)
	library(reshape2)

	con <- dbConnect(MySQL(), user="susan", dbname="susan")

	table <- xmlToDataFrame("http://www.spp.org/XML/LIP-Pricing.xml", stringsAsFactors=F)
	d2 <- dbReadTable(con, "SWPower")
	d2 <- unique(rbind(table, d2))
	d <- dbWriteTable(con, "SWPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)

	# Scrape MISO Pool LMP/MCC/MLC

	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	tmp <- scrape("https://www.misoenergy.org/ria/Consolidated.aspx?format=xml")[[1]]
	tmp2 <- getNodeSet(tmp, "//*/lmpdata")[[1]]
	date <- xmlAttrs(getNodeSet(tmp, "//*/lmpdata")[[1]])[["refid"]]
	table <- ldply(xmlChildren(xmlChildren(tmp2)[[1]]), function(i) as.data.frame(t(xmlAttrs(i)), stringsAsFactors=F))
	table$.id <- date
	names(table)[1] <- "date"

	d2 <- dbReadTable(con, "MisoPower")
	d2 <- unique(rbind(table, d2))
	d <- dbWriteTable(con, "MisoPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)


	# Scrape CAISO (california) LMP
	date <- Sys.time()
	tz(date) <- "America/Los_Angeles"
	hour(date) <- hour(date)+2
	date <- floor_date(date, "minute")
	tmp <- scrape("http://oasis.caiso.com/mrtu-oasis/lmp/RTM/POINTMap.html")[[1]]
	tmp2 <- getNodeSet(tmp, "/html/body/div[1]/div/map")

	table <- ldply(xmlChildren(tmp2[[1]]), function(i){
	x <- as.data.frame(t(xmlAttrs(i)))
	if(grepl("County:", x$alt)) return(data.frame())
	x$Node <- gsub("Node: ", "", word(x$alt, sep="\\r"))
	# x$NodeType <- gsub("Node Type: ", "", word(x$alt, 2, sep="\\r\\n"))
	x$MCP <- gsub("MARGINALCLEARINGPRICE: ", "", word(x$alt, 3, sep="\\r\\n"))
	x$CONGESTLMP <- gsub("CONGESTLMP: ", "", word(x$alt, 5, sep="\\r\\n"))
	x$COSTLMP <- gsub("COSTLMP: ", "", word(x$alt, 6, sep="\\r\\n"))
	x$LOSSLMP <- gsub("LOSSLMP: ", "", word(x$alt, 7, sep="\\r\\n"))

	return(data.frame(date=date, node=x$Node, mcp=x$MCP, congestlmp=x$CONGESTLMP, costlmp=x$COSTLMP, losslmp=x$LOSSLMP, stringsAsFactors=FALSE))
	} )

	table <- table[,-1]
	con <- dbConnect(MySQL(), user="susan", dbname="susan")

	d2 <- dbReadTable(con, "CaisoPower")
	d2 <- unique(rbind(table, d2))
	d <- dbWriteTable(con, "CaisoPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)

	#Scrape ERCOT - Texas power pool
	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	tmp <- scrape("http://www.ercot.com/content/cdr/contours/rtmLmpHg.html")[[1]]
	tmp2 <- getNodeSet(tmp, "//*/map")[[1]]
	date <- gsub("Last Updated: ", "", xmlValue(getNodeSet(tmp2, "//*[@class='datestamp']")[[1]]))
	table <- ldply(xmlChildren(tmp2), function(i){
	x <- xmlAttrs(i)[["title"]]
	data.frame(date=date, location=word(x, sep=":"), lmp=as.numeric(word(x, 2, sep="\\$")), stringsAsFactors=FALSE)
	})
	table <- table[,-1]

	d2 <- dbReadTable(con, "ErcotPower")
	d2 <- unique(rbind(table, d2))
	d <- dbWriteTable(con, "ErcotPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)

	# Scrape NEISO - Northeast Power pool
	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	url <- "http://www.iso-ne.com/markets/5min_data/fiveMinLMP.do?submit=latest"
	data <- read.csv(url, skip=5, header=FALSE, stringsAsFactors=FALSE)
	header <- read.csv(url, skip=2, nrows=1, header=FALSE, stringsAsFactors=FALSE)
	names(data) <- header
	ids <- read.csv("http://www.iso-ne.com/histRpts/rt-lmp/lmp_rt_final_20140205.csv", skip=5, stringsAsFactors=FALSE)
	ids <- unique(ids[,4:6])
	names(ids) <- c("ID", "NodeName", "NodeType")
	data <- merge(data, ids, by.x="Location ID", by.y="ID")
	names(data) <- c("ID", "isData", "DateTime", "Energy", "Congestion", "Loss", "LMP", "NodeName", "NodeType")
	data <- data[,-2]

	d2 <- dbReadTable(con, "NEisoPower")
	d2 <- unique(rbind(data, d2))
	d <- dbWriteTable(con, "NEisoPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)

	#PJM (Pennsylvania Jersey etc)
	url <- "http://www.pjm.com/pub/account/lmpgen/lmppost.html"
	tmp <- scrape(url)[[1]]
	date <- xmlValue(getNodeSet(tmp, "/html/body/center[2]/table/tr/td/font/b")[[1]])
	table1 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[4]/table")[[1]], stringsAsFactors=FALSE)
	table1 <- do.call("cbind", lapply(1:ncol(table1), function(k) str_trim(table1[,k])))

	table1 <- as.data.frame(table1, stringsAsFactors=FALSE)
	names(table1) <- table1[1,]
	table1 <- table1[-1,]
	table1 <- table1[,-which(apply(table1, 2, function(i) sum(nchar(i)!=0))==0)]
	names(table1) <- c("NodeName", "NodeType", "LMP", "HourlyLMPHr22")
	table1$LMP <- as.numeric(table1$LMP)
	table1$HourlyLMPHr22 <- as.numeric(table1$HourlyLMPHr22)

	table2 <- xmlToDataFrame(getNodeSet(tmp, "/html/body/center[6]/table")[[1]], stringsAsFactors=FALSE)
	table2 <- do.call("cbind", lapply(1:ncol(table2), function(k) str_trim(table2[,k])))

	table2 <- as.data.frame(table2, stringsAsFactors=FALSE)
	names(table2) <- table2[1,]
	table2 <- table2[-1,]
	table2 <- table2[,-which(apply(table2, 2, function(i) sum(nchar(i)!=0))==0)]
	table2$NodeType <- "Bus"
	names(table2) <- c("NodeName", "LMP", "HourlyLMPHr22", "NodeType")
	table2$LMP <- as.numeric(table2$LMP)
	table2$HourlyLMPHr22 <- as.numeric(table2$HourlyLMPHr22)

	table <- rbind.fill(table1, table2)
	table$date <- date

	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	d2 <- dbReadTable(con, "PjmPower")
	d2 <- unique(rbind(table, d2))
	d <- dbWriteTable(con, "PjmPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)


	# NYISO LMP
	url <- "http://mis.nyiso.com/public/realtime/realtime_gen_lbmp.csv"
	data <- read.csv(url, stringsAsFactors=FALSE)
	names(data) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion")
	data$Type <- "Generator"

	url2 <- "http://mis.nyiso.com/public/realtime/realtime_zone_lbmp.csv"
	data2 <- read.csv(url2, stringsAsFactors=FALSE)
	names(data2) <- c("DateTime", "Name", "ID", "LBMP", "MarginalLosses", "MarginalCongestion")
	data2$Type <- "Zone"

	data <- rbind.fill(data, data2)
	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	d2 <- dbReadTable(con, "NyisoPower")
	d2 <- unique(rbind(data, d2))
	d <- dbWriteTable(con, "NyisoPower", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)


	#Natural Gas, Oil, Gas, etc. prices

	url <- "http://www.bloomberg.com/energy/"
	date <- Sys.Date()
	tmp <- scrape(url)[[1]]
	tmp2 <- getNodeSet(tmp, "//*/table")
	data <- ldply(tmp2, function(i){
	nrow <- length(xmlChildren(i))-1
	header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue)))
	header <- header[which(nchar(header)>0)]
	x <- ldply(xmlChildren(i)[2:nrow], function(j){
	header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue)))
	header <- header[which(nchar(header)>0)]
	return(header)
	})
	x <- x[,-1]
	names(x) <- header
	x[which(apply(x, 1, function(i) sum(i==names(x))==0)),]
	})
	data$date <- date

	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	d2 <- dbReadTable(con, "EnergyMarkets")
	names(data) <- names(d2)
	d2 <- unique(rbind(data, d2))
	d <- dbWriteTable(con, "EnergyMarkets", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)


	#Metal future prices
	url <- "http://www.bloomberg.com/markets/commodities/futures/metals/"
	date <- Sys.Date()
	tmp <- scrape(url)[[1]]
	tmp2 <- getNodeSet(tmp, "//*/div[@class='std_data_container']")
	data <- ldply(tmp2, function(i){
	class.name <- xmlValue(xmlChildren(i)[["h2"]])
	i <- xmlChildren(xmlChildren(i)[["div"]])[["table"]]
	nrow <- length(xmlChildren(i))-1
	header <- str_trim(unlist(lapply(xmlChildren(xmlChildren(i)[[1]]), xmlValue)))
	header <- header[which(nchar(header)>0)]
	x <- ldply(xmlChildren(i)[2:nrow], function(j){
	header <- str_trim(unlist(lapply(xmlChildren(j), xmlValue)))
	header <- header[which(nchar(header)>0)]
	return(header)
	})
	x <- x[,-1]
	names(x) <- header
	x$class <- class.name
	x[which(apply(x, 1, function(i) sum(i==names(x))==0)),]
	})
	data$date <- date

	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	d2 <- dbReadTable(con, "MetalMarkets")
	names(data) <- names(d2)
	d2 <- unique(rbind(data, d2))
	d <- dbWriteTable(con, "MetalMarkets", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)


	# Natural Gas Markets
	url <- "http://www.naturalgasintel.com/ext/resources/Daily-GPI/ICE.htm"
	tmp <- scrape(url)[[1]]
	tmp2 <- getNodeSet(tmp, "/html/body/table")[[1]]
	rowlist <- xmlChildren(tmp2)
	date <- xmlValue(xmlChildren(rowlist[[1]])[[1]])
	header <- str_trim(sapply(xmlChildren(rowlist[[2]]), xmlValue))
	header <- header[which(nchar(header)>0)]
	header <- gsub("#", "NUM", header, fixed=TRUE)
	data <- ldply(rowlist[-c(1:2)], function(i) {
	x <- str_trim(unlist(sapply(xmlChildren(i), xmlValue)))
	x <- x[which(nchar(x)>0)]
	})
	data <- data[,-1]
	names(data) <- header
	data$TradeDate <- gsub("Trade Date: ", "", word(date, sep=";", start=1))
	data$FlowDate <- str_replace(word(date, sep=";", start=-1), " Flow Date\\(s\\): ", "")

	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	d2 <- dbReadTable(con, "NatGasMarket")
	names(data) <- names(d2)
	d2 <- unique(rbind(data, d2))
	d <- dbWriteTable(con, "NatGasMarket", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)

	# Natural Gas Reserves
	url <- "http://www.eia.gov/oil_gas/natural_gas/data_publications/eia914/eia914.html"
	tmp <- scrape(url)[[1]]
	tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[5]")

	head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue))
	head1 <- head1[which(nchar(head1)>0)]
	head1 <- c(head1[1], rep(head1[-1], each=2))
	data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){
	x <- str_trim(sapply(xmlChildren(i), xmlValue))
	x <- x[which(nchar(x)>0)]
	return(x)
	})
	data1 <- data[,-1]
	names(data1) <- head1
	data <- data1[,c(1, 2, 4, 6, 8, 10)]

	table <- melt(data, id.vars=1, measure.vars=2:6)
	names(table) <- c("Date", "Area", "Value")
	table$Area <- as.character(table$Area)

	tmp2 <- getNodeSet(tmp, "/html/body/table[4]/tr/td/table[6]")

	head1 <- str_trim(sapply(xmlChildren(xmlChildren(tmp2[[1]])[[1]]), xmlValue))
	head1 <- head1[which(nchar(head1)>0)]
	head1 <- c(head1[1], rep(head1[-1], each=2))
	data <- ldply(xmlChildren(tmp2[[1]])[-c(1:2)], function(i){
	x <- str_trim(sapply(xmlChildren(i), xmlValue))
	x <- x[which(nchar(x)>0)]
	return(x)
	})
	data1 <- data[,-1]
	names(data1) <- head1
	data <- data1[,c(1, 2, 4, 6, 8, 10)]

	data <- melt(data, id.vars=1, measure.vars=2:6)
	names(data) <- c("Date", "Area", "Value")
	data$Area <- as.character(data$Area)

	table <- rbind(table, data)
	table$Value <- as.numeric(str_sub(table$Value, 3, -1))

	con <- dbConnect(MySQL(), user="susan", dbname="susan")
	d2 <- dbReadTable(con, "NatGasReserves")
	names(table) <- names(d2)
	d2 <- unique(rbind(table, d2))
	d <- dbWriteTable(con, "NatGasReserves", d2, append=FALSE, overwrite=TRUE)
	dbDisconnect(con)
No results found