Last active
April 26, 2016 10:44
-
-
Save yanping/4619440 to your computer and use it in GitHub Desktop.
从和讯读取财务数据
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# getHexunFinaceData.r | |
# 从和讯读取财务数据 | |
# version: 0.93 | |
# 调用语法: | |
# getHexunFinaceData(stockid, start, end, type) | |
# 参数含义: | |
# stockid 股票代码 | |
# start 起始年份 | |
# end 结束年份 | |
# type 财务报告类别,默认为"b",表示资产负债表;"i"表示利润表;"c"表示现金流量表 | |
# 例子: test <- getHexunFinaceData(600028,2001,2002,"b") | |
getHexunFinaceData <- function(stockid, | |
start = 1991, | |
end = as.numeric(substr(Sys.time(), 1, 4)), | |
type = "b") { | |
if (!is.vector(stockid)) { | |
stop("参数stockid必须是向量形式!") | |
} | |
if (start > end) { | |
stop("起始或结束年份输入有误!") | |
} | |
if (!type %in% c("b", "c", "i")) { | |
stop("错误的财务报告类型!请检查参数type...") | |
} | |
require(stringr) | |
stockid <- as.character(stockid) | |
vnameOld <- vector() | |
value.mat <- vector() | |
total <- length(stockid) * (end - start + 1) * 4 | |
# create progress bar | |
pb <- txtProgressBar(min = 0, max = total, style = 3) | |
i <- 1 | |
for (stock in stockid) { | |
while (nchar(stock) < 6) { | |
stock <- paste("0", stock, sep = "") | |
} | |
if (nchar(stock) > 6) { | |
warning(paste("invalid stock code: ", stock, sep = "")) | |
next | |
} | |
for (year in start:end) { | |
accountdates <- paste(year, c(".03.15", ".06.30", ".09.30", ".12.31"), sep = "") | |
for (term in accountdates) { | |
if (type == "b") { | |
address <- "http://stockdata.stock.hexun.com/2008/zcfz.aspx?stockid=" | |
} else if (type == "i") { | |
address <- "http://stockdata.stock.hexun.com/2008/lr.aspx?stockid=" | |
} else if (type == "c") { | |
address <- "http://stockdata.stock.hexun.com/2008/xjll.aspx?stockid=" | |
} | |
url <- paste(address, stock, "&accountdate=", term, sep = "") | |
txt <- readLines(url) | |
if (sessionInfo()$R.version$os == "linux-gnu") { | |
txt <- iconv(txt, from = "gb2312", to = "UTF-8") | |
} | |
linenum <- grep("<span id=\"ControlEx1_lbl\">", txt) | |
line <- txt[linenum] | |
if (length(line) == 0 | line == "\t\t\t\t\t<span id=\"ControlEx1_lbl\"></span>") { | |
setTxtProgressBar(pb, i) | |
i <- i + 1 | |
next | |
} else { | |
vname.start <- gregexpr("<td class='dotborder' width='45%'><div class='tishi'><strong>", line)[[1]] + 61 | |
vname.end <- gregexpr("</strong></div></td><td>", line)[[1]] - 1 | |
vname <- str_sub(line, vname.start, vname.end) | |
vname <- c("股票代码", vname) | |
if (is.null(vnameOld) | all(vname == vnameOld)) { | |
value.start <- gregexpr("<td><div class='tishi'>", line)[[1]] + 23 | |
value.end <- gregexpr("</div></td><tr>", line)[[1]] - 1 | |
value <- str_sub(line, value.start, value.end) | |
value <- gsub(",", "", value) | |
value <- c(stock, value) | |
value.mat <- rbind(value.mat, value) | |
vnameOld <- vname | |
setTxtProgressBar(pb, i) | |
i <- i + 1 | |
} else { | |
msg <- paste("看来股票", stock, "的数据格式在", year, "年发生了变化!", sep = "") | |
stop(msg) | |
} | |
} | |
} | |
} | |
} | |
close(pb) | |
if (is.null(value.mat)) { | |
return(NULL) | |
} else { | |
rownames(value.mat) <- NULL | |
value.df <- as.data.frame(value.mat, stringsAsFactors = FALSE) | |
value.df[value.df == "--"] <- NA | |
value.df[, 3:(ncol(value.df) - 1)] <- apply(value.df[, 3:(ncol(value.df) - 1)], 2, as.numeric) | |
colnames(value.df) <- vname | |
return(value.df) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
有时候和讯数据不够,对应的日期没有数据,希望对此做容错处理
是否用XML包里面的readHTMLtable更快?