yanping · April 26, 2016 10:44 · laoyang945 · Mar 20, 2013
diff --git a/getHexunFinaceData.r b/getHexunFinaceData.r
 # getHexunFinaceData.r 
 # 从和讯读取财务数据 
 # version: 0.93 
 # 调用语法：
 #    getHexunFinaceData(stockid, start, end, type)
 # 参数含义：
 #   stockid  股票代码
 #   start    起始年份
 #   end      结束年份
 #   type     财务报告类别，默认为"b"，表示资产负债表；"i"表示利润表；"c"表示现金流量表
 # 例子： test <- getHexunFinaceData(600028,2001,2002,"b")

 getHexunFinaceData <- function(stockid, 
                                start = 1991, 
                                end = as.numeric(substr(Sys.time(), 1, 4)), 
                                type = "b") {
  if (!is.vector(stockid)) {
    stop("参数stockid必须是向量形式！")
  }
  if (start > end) {
    stop("起始或结束年份输入有误！")
  }
  if (!type %in% c("b", "c", "i")) {
    stop("错误的财务报告类型！请检查参数type...")
  }
  require(stringr)
  stockid <- as.character(stockid)
  vnameOld <- vector()
  value.mat <- vector()
  total <- length(stockid) * (end - start + 1) * 4
  # create progress bar
  pb <- txtProgressBar(min = 0, max = total, style = 3)
  i <- 1
  for (stock in stockid) {
    while (nchar(stock) < 6) {
      stock <- paste("0", stock, sep = "")
    }
    if (nchar(stock) > 6) {
      warning(paste("invalid stock code: ", stock, sep = ""))
      next
    }
    for (year in start:end) {
      accountdates <- paste(year, c(".03.15", ".06.30", ".09.30", ".12.31"), sep = "")
      for (term in accountdates) {
        if (type == "b") {
          address <- "http://stockdata.stock.hexun.com/2008/zcfz.aspx?stockid="
        } else if (type == "i") {
          address <- "http://stockdata.stock.hexun.com/2008/lr.aspx?stockid="
        } else if (type == "c") {
          address <- "http://stockdata.stock.hexun.com/2008/xjll.aspx?stockid="
        }
        url <- paste(address, stock, "&accountdate=", term, sep = "")
        txt <- readLines(url)
        if (sessionInfo()$R.version$os == "linux-gnu") {
          txt <- iconv(txt, from = "gb2312", to = "UTF-8")
        }
        linenum <- grep("<span id=\"ControlEx1_lbl\">", txt)
        line <- txt[linenum]
        if (length(line) == 0 | line == "\t\t\t\t\t<span id=\"ControlEx1_lbl\"></span>") {
          setTxtProgressBar(pb, i)
          i <- i + 1
          next
        } else {
          vname.start <- gregexpr("<td class='dotborder' width='45%'><div class='tishi'><strong>", line)[[1]] + 61
          vname.end <- gregexpr("</strong></div></td><td>", line)[[1]] - 1
          vname <- str_sub(line, vname.start, vname.end)
          vname <- c("股票代码", vname)
          if (is.null(vnameOld) | all(vname == vnameOld)) {
            value.start <- gregexpr("<td><div class='tishi'>", line)[[1]] + 23
            value.end <- gregexpr("</div></td><tr>", line)[[1]] - 1
            value <- str_sub(line, value.start, value.end)
            value <- gsub(",", "", value)
            value <- c(stock, value)
            value.mat <- rbind(value.mat, value)
            vnameOld <- vname
            setTxtProgressBar(pb, i)
            i <- i + 1
          } else {
            msg <- paste("看来股票", stock, "的数据格式在", year, "年发生了变化！", sep = "")
            stop(msg)
          }
        }
      }
    }
  }
  close(pb)
  if (is.null(value.mat)) {
    return(NULL)
  } else {
    rownames(value.mat) <- NULL
    value.df <- as.data.frame(value.mat, stringsAsFactors = FALSE)
    value.df[value.df == "--"] <- NA
    value.df[, 3:(ncol(value.df) - 1)] <- apply(value.df[, 3:(ncol(value.df) - 1)], 2, as.numeric)
    colnames(value.df) <- vname
    return(value.df)
  }
 }
	# getHexunFinaceData.r
	# 从和讯读取财务数据
	# version: 0.93
	# 调用语法：
	# getHexunFinaceData(stockid, start, end, type)
	# 参数含义：
	# stockid 股票代码
	# start 起始年份
	# end 结束年份
	# type 财务报告类别，默认为"b"，表示资产负债表；"i"表示利润表；"c"表示现金流量表
	# 例子： test <- getHexunFinaceData(600028,2001,2002,"b")

	getHexunFinaceData <- function(stockid,
	start = 1991,
	end = as.numeric(substr(Sys.time(), 1, 4)),
	type = "b") {
	if (!is.vector(stockid)) {
	stop("参数stockid必须是向量形式！")
	}
	if (start > end) {
	stop("起始或结束年份输入有误！")
	}
	if (!type %in% c("b", "c", "i")) {
	stop("错误的财务报告类型！请检查参数type...")
	}
	require(stringr)
	stockid <- as.character(stockid)
	vnameOld <- vector()
	value.mat <- vector()
	total <- length(stockid) * (end - start + 1) * 4
	# create progress bar
	pb <- txtProgressBar(min = 0, max = total, style = 3)
	i <- 1
	for (stock in stockid) {
	while (nchar(stock) < 6) {
	stock <- paste("0", stock, sep = "")
	}
	if (nchar(stock) > 6) {
	warning(paste("invalid stock code: ", stock, sep = ""))
	next
	}
	for (year in start:end) {
	accountdates <- paste(year, c(".03.15", ".06.30", ".09.30", ".12.31"), sep = "")
	for (term in accountdates) {
	if (type == "b") {
	address <- "http://stockdata.stock.hexun.com/2008/zcfz.aspx?stockid="
	} else if (type == "i") {
	address <- "http://stockdata.stock.hexun.com/2008/lr.aspx?stockid="
	} else if (type == "c") {
	address <- "http://stockdata.stock.hexun.com/2008/xjll.aspx?stockid="
	}
	url <- paste(address, stock, "&accountdate=", term, sep = "")
	txt <- readLines(url)
	if (sessionInfo()$R.version$os == "linux-gnu") {
	txt <- iconv(txt, from = "gb2312", to = "UTF-8")
	}
	linenum <- grep("<span id=\"ControlEx1_lbl\">", txt)
	line <- txt[linenum]
	if (length(line) == 0 \| line == "\t\t\t\t\t<span id=\"ControlEx1_lbl\"></span>") {
	setTxtProgressBar(pb, i)
	i <- i + 1
	next
	} else {
	vname.start <- gregexpr("<td class='dotborder' width='45%'><div class='tishi'><strong>", line)[[1]] + 61
	vname.end <- gregexpr("</strong></div></td><td>", line)[[1]] - 1
	vname <- str_sub(line, vname.start, vname.end)
	vname <- c("股票代码", vname)
	if (is.null(vnameOld) \| all(vname == vnameOld)) {
	value.start <- gregexpr("<td><div class='tishi'>", line)[[1]] + 23
	value.end <- gregexpr("</div></td><tr>", line)[[1]] - 1
	value <- str_sub(line, value.start, value.end)
	value <- gsub(",", "", value)
	value <- c(stock, value)
	value.mat <- rbind(value.mat, value)
	vnameOld <- vname
	setTxtProgressBar(pb, i)
	i <- i + 1
	} else {
	msg <- paste("看来股票", stock, "的数据格式在", year, "年发生了变化！", sep = "")
	stop(msg)
	}
	}
	}
	}
	}
	close(pb)
	if (is.null(value.mat)) {
	return(NULL)
	} else {
	rownames(value.mat) <- NULL
	value.df <- as.data.frame(value.mat, stringsAsFactors = FALSE)
	value.df[value.df == "--"] <- NA
	value.df[, 3:(ncol(value.df) - 1)] <- apply(value.df[, 3:(ncol(value.df) - 1)], 2, as.numeric)
	colnames(value.df) <- vname
	return(value.df)
	}
	}