Created
January 26, 2018 22:01
-
-
Save wilsonfreitas/bb77b7da02f96f83d3bff11aca7303b7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(httr) | |
library(xml2) | |
get_deb_list <- function(.which = c("registrados", "excluidos", "todos")) { | |
.which <- match.arg(.which) | |
.which <- switch(.which, | |
registrados = "False", | |
excluidos = "True", | |
todos = "Nada") | |
# op_exc=False para debentures registrados | |
# op_exc=True para debentures excluidos | |
# op_exc=Nada para todas debentures | |
url <- sprintf("http://www.debentures.com.br/exploreosnd/consultaadados/emissoesdedebentures/caracteristicas_r.asp?tip_deb=publicas&op_exc=%s", .which) | |
r <- GET(url) | |
if (r$status_code != 200) | |
stop("Contract list unreacheable - status code = ", r$status_code) | |
x <- content(r, as = "raw") | |
parsed <- read_html(rawToChar(x)) | |
tx <- xml_find_all(parsed, "//table[@class = 'Tab10333333']") | |
if (length(tx) > 1) | |
warning("More than one table.Tab10333333") | |
tx <- tx[[1]] | |
ax <- xml_find_all(tx, ".//tr/td/a") | |
href_ <- xml_attr(ax, "href") | |
str_match(href_, "selecao=(\\w+\\s?\\d+)\\W")[,2] | |
} | |
library(readr) | |
library(readxl) | |
library(stringr) | |
library(transmute) | |
library(tibble) | |
library(dplyr) | |
library(httr) | |
library(glue) | |
# ---- | |
set_config(use_proxy(url="10.0.0.24",port=8080, username="wfreitas", password="Kkkmybb1")) | |
source("scripts/ANBIMA_functions.R") | |
DIRECTORY <- "data/ANBIMA/Agenda" | |
if (!dir.exists(DIRECTORY)) dir.create(DIRECTORY) | |
# ---- | |
mapping <- readxl::read_xlsx("data/Mapeamento_B3-ANBIMA.xlsx") | |
deb_list <- mapping$TickerANBIMA %>% na.omit() %>% unique() | |
# ---- | |
lapply(deb_list, function(ticker) { | |
url_tpl <- 'http://www.debentures.com.br/exploreosnd/consultaadados/eventosfinanceiros/agenda_e.asp?ativo={ticker}&dt_ini=01/01/1970&dt_fim=31/12/2029' | |
url <- glue::glue(url_tpl, ticker = ticker) | |
res <- GET(url) | |
res_rw <- content(res, as = "raw") | |
writeChar(rawToChar(res_rw), file.path(DIRECTORY, glue("{ticker}.txt", ticker = ticker))) | |
futile.logger::flog.info("%s saved", ticker) | |
}) | |
# ---- | |
fnames <- list.files(DIRECTORY, full.names = TRUE) | |
cashflows <- lapply(fnames, function(fname) { | |
df <- readr::read_delim(fname, "\t", skip = 2, col_types = cols(), | |
locale = locale("pt", encoding = "latin1")) | |
coupons <- df %>% filter(df$Evento == "Juros") %>% | |
select(`Data do Evento`, `Data do Pagamento`, `Ativo`) %>% | |
rename(date = `Data do Evento`, date_adj = `Data do Pagamento`, ticker = `Ativo`) %>% | |
mutate(date = as.Date(date, "%d/%m/%Y"), date_adj = as.Date(date_adj, "%d/%m/%Y")) | |
amort <- df %>% filter(df$Evento != "Juros") %>% | |
select(`Data do Evento`, `Data do Pagamento`, `Ativo`) %>% | |
rename(date = `Data do Evento`, date_adj = `Data do Pagamento`, ticker = `Ativo`) %>% | |
mutate(date = as.Date(date, "%d/%m/%Y"), date_adj = as.Date(date_adj, "%d/%m/%Y")) | |
list(coupons = coupons$date, amort = amort$date, ticker = str_trim(amort$ticker[1])) | |
}) | |
# ---- | |
df <- readr::read_csv("VNRAMT_20170831202548587.csv", col_types = cols()) | |
cashflows_gps <- lapply(split(df, df$simbolo), function(df_) { | |
if (all(is.na(df_$AMTValor))) | |
return(NULL) | |
list( | |
coupons = as.Date(as.character(df_$VNRData), "%Y%m%d"), | |
amort = as.Date(as.character(df_$AMTData), "%Y%m%d"), | |
ticker = df_$simbolo[1] | |
) | |
}) | |
cashflows_gps <- Filter(function(x) !is.null(x), cashflows_gps) | |
# ---- | |
ls_ <- lapply(seq_along(cashflows), function (ix) { | |
cf <- cashflows[[ix]] | |
ticker <- cf$ticker | |
if (is.na(ticker)) return(NULL) | |
ix <- which(mapping$TickerANBIMA == ticker) | |
if (length(ix) == 0) return(NULL) | |
futile.logger::flog.info(mapping$TickerB3[ix]) | |
cf_gps <- cashflows_gps[[ mapping$TickerB3[ix] ]] | |
data.frame( | |
coupon_length_check = length(cf$coupons) == length(cf_gps$coupons), | |
coupon_length = length(cf$coupons), | |
coupon_length_gps = length(cf_gps$coupons), | |
amort_length_check = length(cf$amort) == length(cf_gps$amort), | |
amort_length = length(cf$amort), | |
amort_length_gps = length(cf_gps$amort) | |
# amort_content = sum(cf$amort == cf_gps$amort) | |
) | |
}) | |
do.call(rbind, ls_) %>% View() | |
library(transmute) | |
library(tibble) | |
library(dplyr) | |
library(xlsx) | |
DIRECTORY <- "data/ANBIMA/cadastro" | |
namesonly <- function(x) { | |
if (!is.list(x)) | |
return("") | |
nx <- names(x) | |
if (is.null(nx)) { | |
lx <- lapply(x, namesonly) | |
} else { | |
lx <- lapply(nx, function(.nx) namesonly(x[[.nx]])) | |
lx <- mapply(function(x, y) { | |
sx <- paste(x, y, sep = "::") | |
sub("::$", "", sx) | |
}, nx, lx, USE.NAMES = FALSE) | |
} | |
Reduce(c, lx) | |
} | |
.value <- function(m, k) { | |
if (is.list(m)) { | |
val <- if (is.null(m[[k]])) NA_character_ else m[[k]] | |
val <- if (!is.na(val) && val == "-") NA_character_ else val | |
val | |
} else { | |
if (is.null(m)) NA_character_ else m | |
} | |
} | |
.get <- function(m, k, r) { | |
if (length(r) == 0) | |
.value(m, k) | |
else { | |
.get(m[[k]], r[1], r[-1]) | |
} | |
} | |
getvalues <- function(.ls, .nx) { | |
val <- lapply(.nx, function(x, M) { | |
kx <- stringr::str_split(x, "::") | |
sapply(kx, function(.kx) .get(M, .kx[1], .kx[-1])) | |
}, .ls) | |
setNames(val, .nx) | |
} | |
fnames <- list.files(DIRECTORY, "*.json", full.names = TRUE) | |
debs_spec <- lapply(fnames, function(fname) { | |
jsonlite::fromJSON(readLines(fname), simplifyDataFrame = FALSE) | |
}) | |
keys_names <- lapply(debs_spec, function(spec) { | |
namesonly(spec) | |
}) | |
keys_names <- Reduce(function(x, y) unique(c(x, y)), keys_names) | |
debs_flat <- lapply(debs_spec, function(x) getvalues(x, keys_names)) | |
debs_tbl <- bind_rows(debs_flat) | |
# match_regex("^\\d+,?\\d*$", function(text, match) { | |
# as.numeric(sub(",", ".", text)) | |
# }, priority = 2), | |
tr <- transmuter( | |
match_regex("^(\\d+\\.)*\\d+,?\\d*$", function(text, match) { | |
as.numeric(sub(",", ".", gsub("\\.", "", text))) | |
}), | |
match_regex("^\\d\\d/\\d\\d/\\d\\d\\d\\d$", function(text, match) { | |
as.Date(text, "%d/%m/%Y") | |
}), | |
match_regex("^Sim|N.o$", function(text, match) { | |
text == "Sim" | |
}) | |
) | |
debs_tbl.x <- transmute::transmute(tr, as.data.frame(debs_tbl)) | |
# write.table(debs_tbl.x, "ANBIMA.csv", sep = ";", dec = ",", row.names = FALSE, na = "") | |
write.xlsx2(debs_tbl.x, file = file.path(DIRECTORY, "..", "ANBIMA_DEBENTURES.xlsx"), | |
row.names = FALSE, sheetName = "DEBENTURES") | |
x <- readxl::read_excel("scripts/mapeamento_colunas_debentures_ANBIMA.xlsx", col_names = FALSE) | |
df.x <- debs_tbl.x[,x$X__2 != "-"] | |
colnames(df.x) <- as.character(x$X__2[x$X__2 != "-"]) | |
df <- lapply(stringr::str_split(df.x$valor_nominal, "\\s+"), function(x) { | |
data.frame(moeda = x[1], valor_nominal = as.numeric(sub(",", ".", gsub("\\.", "", x[2])))) | |
}) | |
df <- do.call(rbind, df) | |
df.x$moeda <- df$moeda | |
df.x$valor_nominal <- df$valor_nominal | |
# write.table(df.x, "debentures_ANBIMA_simplificado.csv", sep = ";", dec = ",", row.names = FALSE, na = "") | |
write.xlsx2(df.x, file = file.path(DIRECTORY, "..", "ANBIMA_DEBENTURES.xlsx"), | |
row.names = FALSE, sheetName = "DEBENTURES_SIMPLIFICADO", append = TRUE) | |
library(xlsx) | |
# URL <- 'http://bvmf.bmfbovespa.com.br/rendafixa/DownloadResultadoResumoEmissoes.asp?intAnoEmiss=0&intMesEmiss=0&strRzSocial=&strCodEmiss=&strAgFiduc=&strBcoLider=&strGarantia=&strIndexador=&strClasse=&curVolume=0&intAnoPrazo=&intMesPrazo=&COD_TITU=1&strEmiDebPad=&strCodForMer=&strMercado=&Art1=&Art2=' | |
# intAnoEmiss=0 & | |
# intMesEmiss=0 & | |
# strRzSocial= & | |
# strCodEmiss= & | |
# strAgFiduc= & | |
# strBcoLider= & | |
# strGarantia= & | |
# strIndexador= & | |
# strClasse= & | |
# curVolume=0 & | |
# intAnoPrazo= & | |
# intMesPrazo= & | |
# COD_TITU=1 & | |
# strEmiDebPad= & | |
# strCodForMer= & | |
# strMercado= & | |
# Art1= & | |
# Art2= | |
# -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4,la;q=0.2' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cookie: idioma=pt-br; ASPSESSIONIDASSBCBRS=NMLJFLNCDFIHFBDMAJMJGKLK; ASPSESSIONIDQCBAQDTA=AJEPHDHCOFBCDPFLKFAODHGP; _ga=GA1.3.566248668.1421426041; _gid=GA1.3.1333907278.1504277354; ASPSESSIONIDQCBAQBTB=CBHALNGAIIMILMLJODJFDEDM; _ga=GA1.4.566248668.1421426041; _gid=GA1.4.1333907278.1504277354; TS01871345=011d592ce16f82deadeb997fb5df87baa997e23e316a7aff62052d81e4cb8f133996f53e57ff0c691a06d83fc4ee27b3b7d6492930625ad01ed651881f4040d49df0005f27' -H 'Connection: keep-alive' --compressed | |
# df <- readxl::read_xlsx("B3_debentures.xlsx") | |
library(XML) | |
library(glue) | |
library(stringr) | |
library(xlsx) | |
DIRECTORY <- "data/B3" | |
# read file | |
cadDoc <- xmlInternalTreeParse("W:/Compartilhados/Macros_Departamentais-246/DC-CMOA/APPS/SIP-DB/RawData/20170831/BVBG.028.02-2017-08-31.xml") | |
# Filter contracts by market code (Mkt) | |
# 1 is spot | |
# 2 is futures | |
# 3 is options on spot | |
# 4 is options on futures | |
# 5 is forward | |
contr_nodes <- getNodeSet(cadDoc, | |
"//d:SctyCtgy[text() = '71']/parent::*", c(d="urn:bvmf.100.02.xsd")) | |
contr_nodes <- getNodeSet(cadDoc, | |
"//d:SctyCtgy[text() = '38']/parent::*", c(d="urn:bvmf.100.02.xsd")) | |
contr_df <- lapply(contr_nodes, function(node) { | |
ticker <- xmlValue(node[['TckrSymb']]) | |
isin <- xmlValue(node[['ISIN']]) | |
trading_start_date <- xmlValue(node[['TradgStartDt']]) | |
trading_end_date <- xmlValue(node[['TradgEndDt']]) | |
risk_rating <- xmlValue(node[['RskRatg']]) | |
data.frame(Ticker = str_replace(ticker, "L1$", ""), | |
ISIN = isin, | |
TradingStartDate = as.Date(trading_start_date), | |
TradingEndDate = as.Date(trading_end_date), | |
RiskRating = risk_rating) | |
}) | |
contr_df <- do.call(rbind, contr_df) | |
write.xlsx(contr_df, file.path(DIRECTORY, "B3_DEBENTURES.xlsx"), | |
row.names = FALSE) | |
library(httr) | |
httr::set_config(httr::use_proxy(url="10.0.0.24",port=8080, username="wfreitas", password="Kkkmybb1")) | |
get_api_settings <- function() { | |
url <- parse_url("https://calculadorarendafixa.com.br") | |
r <- GET(url) | |
m <- stringr::str_match(content(r, as = "text"), "authorization = '([\\d\\w]*)'") | |
auth_code <- if (! any(is.na(m[1, 2]))) m[1,2] else NULL | |
m <- stringr::str_match(content(r, as = "text"), "apiurl = '(.*)'") | |
api_url <- if (! any(is.na(m[1, 2]))) m[1,2] else NULL | |
list(auth_code = auth_code, api_url = api_url) | |
} | |
settings <- get_api_settings() | |
connect_calculadora <- function(x, auth_code) { | |
httr::VERB( | |
verb = "GET", url = x, | |
httr::add_headers( | |
`Accept-Encoding` = "gzip, deflate, sdch, br", | |
`Accept-Language` = "pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4", | |
`Accept` = "text/javascript, text/html, application/xml, text/xml, */*", | |
`Authorization` = auth_code, | |
`Auth-Code` = auth_code, | |
`X-Requested-With` = "XMLHttpRequest", | |
`Connection` = "keep-alive", | |
`Referer` = "https://calculadorarendafixa.com.br/", | |
`User-Agent` = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" | |
) | |
) | |
} | |
list_bond_codes <- function() { | |
r <- connect_calculadora(modify_url("https://calculadorarendafixa.com.br", | |
path = c(settings$api_url, "listBondCodes")), | |
settings$auth_code) | |
if (status_code(r) == 200) { | |
x <- jsonlite::fromJSON(content(r, as = "text", encoding = "UTF-8")) | |
if (is.list(x) && exists("err", x) && x$err) | |
stop("Problems downloading bond codes. API HTTP status code = ", x$httpStatusCode, | |
"; API Message = ", x$message) | |
else | |
x | |
} else | |
stop("Unable to download bond codes. status = ", status_code(r)) | |
} | |
get_bond_details <- function(bond_code, as = c("json", "text")) { | |
as <- match.arg(as) | |
url_ <- modify_url("https://calculadorarendafixa.com.br", | |
path = c(settings$api_url, "getBondDetails", bond_code)) | |
r <- connect_calculadora(url_, settings$auth_code) | |
if (status_code(r) == 200) { | |
text <- content(r, as = "text", encoding = "UTF-8") | |
if (as == "text") | |
return(text) | |
x <- jsonlite::fromJSON(text) | |
if (is.list(x) && exists("err", x) && x$err) | |
stop("Problems downloading bond details. API HTTP status code = ", x$httpStatusCode, | |
"; API Message = ", x$message) | |
else | |
x | |
} else | |
stop("Unable to download bond details. status = ", status_code(r)) | |
} | |
calc_bond <- function(bond_code, refdate, yield) { | |
url_ <- modify_url("https://calculadorarendafixa.com.br", | |
path = c(settings$api_url, "calcPU", bond_code, refdate, yield)) | |
r <- connect_calculadora(url_, settings$auth_code) | |
if (status_code(r) == 200) { | |
x <- jsonlite::fromJSON(content(r, as = "text", encoding = "UTF-8")) | |
if (is.list(x) && exists("err", x) && x$err) | |
stop("Problems downloading bond results. API HTTP status code = ", x$httpStatusCode, | |
"; API Message = ", x$message) | |
else | |
x | |
} else | |
stop("Unable to download bond results. status = ", status_code(r)) | |
} | |
# $err | |
# [1] TRUE | |
# | |
# $httpStatusCode | |
# [1] 400 | |
# | |
# $httpStatusMessage | |
# [1] "Bad Request" | |
# | |
# $message | |
# [1] "Debenture AAAA11 não existente" | |
library(xlsx) | |
DIRECTORY <- "data/CETIP/cadastro" | |
details <- lapply(list.files(DIRECTORY, pattern = "[^_]+.json", full.names = TRUE), function(fname) { | |
jsonlite::fromJSON(readLines(fname)) | |
}) | |
details_df <- lapply(details, function(x) { | |
data.frame( | |
symbol = x$codbond, | |
anniversary_day = x$anniversaryday, | |
start_date = x$startingdate, | |
maturity_date = x$expiredate, | |
issue_date = x$issuedate, | |
index = x$method, | |
issuer = x$issuer, | |
notional_value = x$vne, | |
coupon_rate = x$yield, | |
stringsAsFactors = FALSE | |
) | |
}) | |
details_df <- do.call(rbind, details_df) | |
write.xlsx2(details_df, file = file.path(DIRECTORY, "..", "CETIP_DEBENTURES.xlsx"), | |
row.names = FALSE, sheetName = "DEBENTURES") | |
cashflow_df <- lapply(details, function(x) { | |
df <- x$events | |
df_A <- subset(df, eventType == "A") | |
df_J <- subset(df, eventType == "J") | |
df_A$eventType <- NULL | |
df_J$eventType <- NULL | |
if (nrow(df_A) > 0 && nrow(df_J) > 0) { | |
df_A$amort <- df_A$yield | |
df_J$coupon <- df_J$yield | |
df_J$yield <- NULL | |
df_A$yield <- NULL | |
df <- merge(df_A, df_J, by = "date", all = TRUE) | |
} else if (nrow(df_A) == 0) { | |
df_J$coupon <- df_J$yield | |
df_J$amort <- NA | |
df_J$yield <- NULL | |
df <- df_J | |
} else if (nrow(df_J) == 0) { | |
df_A$amort <- df_A$yield | |
df_A$coupon <- NA | |
df_A$yield <- NULL | |
df <- df_A | |
} | |
df$symbol <- x$codbond | |
ox <- order(df$date) | |
df[ox,colnames(df)[c(ncol(df),seq_len(ncol(df)-1))]] | |
}) | |
cashflow_df <- do.call(rbind, cashflow_df) | |
write.xlsx2(cashflow_df, file = file.path(DIRECTORY, "..", "CETIP_DEBENTURES.xlsx"), | |
row.names = FALSE, sheetName = "CASHFLOW", append = TRUE) | |
library(digest) | |
source("scripts/CETIP_calculadorarendafixa_functions.R") | |
OUT_DIRECTORY <- "data/CETIP/cadastro" | |
if (! dir.exists(OUT_DIRECTORY)) | |
stop(OUT_DIRECTORY, " does not exist!") | |
HASHES_FILE <- file.path(OUT_DIRECTORY, "hashes.rda") | |
strsort <- function(x) sapply(lapply(strsplit(x, NULL), sort), paste, collapse="") | |
save_file <- function(code) { | |
fname <- file.path(OUT_DIRECTORY, paste0(code, ".json")) | |
if (file.exists(fname)) { | |
fname <- file.path(OUT_DIRECTORY, paste0(code, "_", format(Sys.Date(), "%Y%m%d"), ".json")) | |
} | |
writeChar(x, fname) | |
} | |
codes <- list_bond_codes() | |
bonds_details <- lapply(codes, function(code) { | |
details <- try(get_bond_details(code, as = "text")) | |
if (is(details, "try-error")) { | |
futile.logger::flog.error("%s mensagem = %s", code, conditionMessage(attr(details, "condition"))) | |
NULL | |
} else { | |
futile.logger::flog.info("%s baixado", code) | |
details | |
} | |
}) | |
names(bonds_details) <- codes | |
hashes <- lapply(bonds_details, function(x) { | |
x <- strsort(stringr::str_trim(x)) | |
digest(x, algo = "sha1") | |
}) | |
STORE <- new.env() | |
load(HASHES_FILE, STORE) | |
checked <- sapply(names(STORE$hashes), function(nx) { | |
if (exists(nx, hashes)) { | |
hashes[[nx]] == STORE$hashes[[nx]] | |
} else NA | |
}) | |
not_checked <- names(STORE$hashes)[!checked] | |
not_checked <- Filter(function(x) !is.na(x), not_checked) | |
for (code in not_checked) { | |
futile.logger::flog.info("Atualizando %s debênture", code) | |
x <- bonds_details[[code]] | |
STORE$hashes[[code]] <- hashes[[code]] | |
save_file(code) | |
} | |
if ( any(! names(hashes) %in% names(STORE$hashes)) ) { | |
futile.logger::flog.info("%d debêntures novas", length(missing)) | |
ix <- ! names(hashes) %in% names(STORE$hashes) | |
for (code in names(hashes)[ix]) { | |
futile.logger::flog.info("Salvando %s debênture", code) | |
x <- bonds_details[[code]] | |
STORE$hashes[[code]] <- hashes[[code]] | |
save_file(code) | |
} | |
} | |
hashes <- STORE$hashes | |
save(hashes, file = HASHES_FILE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment