Skip to content

Instantly share code, notes, and snippets.

@wilsonfreitas
Created January 26, 2018 22:01
Show Gist options
  • Save wilsonfreitas/bb77b7da02f96f83d3bff11aca7303b7 to your computer and use it in GitHub Desktop.
Save wilsonfreitas/bb77b7da02f96f83d3bff11aca7303b7 to your computer and use it in GitHub Desktop.
library(httr)
library(xml2)
get_deb_list <- function(.which = c("registrados", "excluidos", "todos")) {
.which <- match.arg(.which)
.which <- switch(.which,
registrados = "False",
excluidos = "True",
todos = "Nada")
# op_exc=False para debentures registrados
# op_exc=True para debentures excluidos
# op_exc=Nada para todas debentures
url <- sprintf("http://www.debentures.com.br/exploreosnd/consultaadados/emissoesdedebentures/caracteristicas_r.asp?tip_deb=publicas&op_exc=%s", .which)
r <- GET(url)
if (r$status_code != 200)
stop("Contract list unreacheable - status code = ", r$status_code)
x <- content(r, as = "raw")
parsed <- read_html(rawToChar(x))
tx <- xml_find_all(parsed, "//table[@class = 'Tab10333333']")
if (length(tx) > 1)
warning("More than one table.Tab10333333")
tx <- tx[[1]]
ax <- xml_find_all(tx, ".//tr/td/a")
href_ <- xml_attr(ax, "href")
str_match(href_, "selecao=(\\w+\\s?\\d+)\\W")[,2]
}
library(readr)
library(readxl)
library(stringr)
library(transmute)
library(tibble)
library(dplyr)
library(httr)
library(glue)
# ----
set_config(use_proxy(url="10.0.0.24",port=8080, username="wfreitas", password="Kkkmybb1"))
source("scripts/ANBIMA_functions.R")
DIRECTORY <- "data/ANBIMA/Agenda"
if (!dir.exists(DIRECTORY)) dir.create(DIRECTORY)
# ----
mapping <- readxl::read_xlsx("data/Mapeamento_B3-ANBIMA.xlsx")
deb_list <- mapping$TickerANBIMA %>% na.omit() %>% unique()
# ----
lapply(deb_list, function(ticker) {
url_tpl <- 'http://www.debentures.com.br/exploreosnd/consultaadados/eventosfinanceiros/agenda_e.asp?ativo={ticker}&dt_ini=01/01/1970&dt_fim=31/12/2029'
url <- glue::glue(url_tpl, ticker = ticker)
res <- GET(url)
res_rw <- content(res, as = "raw")
writeChar(rawToChar(res_rw), file.path(DIRECTORY, glue("{ticker}.txt", ticker = ticker)))
futile.logger::flog.info("%s saved", ticker)
})
# ----
fnames <- list.files(DIRECTORY, full.names = TRUE)
cashflows <- lapply(fnames, function(fname) {
df <- readr::read_delim(fname, "\t", skip = 2, col_types = cols(),
locale = locale("pt", encoding = "latin1"))
coupons <- df %>% filter(df$Evento == "Juros") %>%
select(`Data do Evento`, `Data do Pagamento`, `Ativo`) %>%
rename(date = `Data do Evento`, date_adj = `Data do Pagamento`, ticker = `Ativo`) %>%
mutate(date = as.Date(date, "%d/%m/%Y"), date_adj = as.Date(date_adj, "%d/%m/%Y"))
amort <- df %>% filter(df$Evento != "Juros") %>%
select(`Data do Evento`, `Data do Pagamento`, `Ativo`) %>%
rename(date = `Data do Evento`, date_adj = `Data do Pagamento`, ticker = `Ativo`) %>%
mutate(date = as.Date(date, "%d/%m/%Y"), date_adj = as.Date(date_adj, "%d/%m/%Y"))
list(coupons = coupons$date, amort = amort$date, ticker = str_trim(amort$ticker[1]))
})
# ----
df <- readr::read_csv("VNRAMT_20170831202548587.csv", col_types = cols())
cashflows_gps <- lapply(split(df, df$simbolo), function(df_) {
if (all(is.na(df_$AMTValor)))
return(NULL)
list(
coupons = as.Date(as.character(df_$VNRData), "%Y%m%d"),
amort = as.Date(as.character(df_$AMTData), "%Y%m%d"),
ticker = df_$simbolo[1]
)
})
cashflows_gps <- Filter(function(x) !is.null(x), cashflows_gps)
# ----
ls_ <- lapply(seq_along(cashflows), function (ix) {
cf <- cashflows[[ix]]
ticker <- cf$ticker
if (is.na(ticker)) return(NULL)
ix <- which(mapping$TickerANBIMA == ticker)
if (length(ix) == 0) return(NULL)
futile.logger::flog.info(mapping$TickerB3[ix])
cf_gps <- cashflows_gps[[ mapping$TickerB3[ix] ]]
data.frame(
coupon_length_check = length(cf$coupons) == length(cf_gps$coupons),
coupon_length = length(cf$coupons),
coupon_length_gps = length(cf_gps$coupons),
amort_length_check = length(cf$amort) == length(cf_gps$amort),
amort_length = length(cf$amort),
amort_length_gps = length(cf_gps$amort)
# amort_content = sum(cf$amort == cf_gps$amort)
)
})
do.call(rbind, ls_) %>% View()
library(transmute)
library(tibble)
library(dplyr)
library(xlsx)
DIRECTORY <- "data/ANBIMA/cadastro"
namesonly <- function(x) {
if (!is.list(x))
return("")
nx <- names(x)
if (is.null(nx)) {
lx <- lapply(x, namesonly)
} else {
lx <- lapply(nx, function(.nx) namesonly(x[[.nx]]))
lx <- mapply(function(x, y) {
sx <- paste(x, y, sep = "::")
sub("::$", "", sx)
}, nx, lx, USE.NAMES = FALSE)
}
Reduce(c, lx)
}
.value <- function(m, k) {
if (is.list(m)) {
val <- if (is.null(m[[k]])) NA_character_ else m[[k]]
val <- if (!is.na(val) && val == "-") NA_character_ else val
val
} else {
if (is.null(m)) NA_character_ else m
}
}
.get <- function(m, k, r) {
if (length(r) == 0)
.value(m, k)
else {
.get(m[[k]], r[1], r[-1])
}
}
getvalues <- function(.ls, .nx) {
val <- lapply(.nx, function(x, M) {
kx <- stringr::str_split(x, "::")
sapply(kx, function(.kx) .get(M, .kx[1], .kx[-1]))
}, .ls)
setNames(val, .nx)
}
fnames <- list.files(DIRECTORY, "*.json", full.names = TRUE)
debs_spec <- lapply(fnames, function(fname) {
jsonlite::fromJSON(readLines(fname), simplifyDataFrame = FALSE)
})
keys_names <- lapply(debs_spec, function(spec) {
namesonly(spec)
})
keys_names <- Reduce(function(x, y) unique(c(x, y)), keys_names)
debs_flat <- lapply(debs_spec, function(x) getvalues(x, keys_names))
debs_tbl <- bind_rows(debs_flat)
# match_regex("^\\d+,?\\d*$", function(text, match) {
# as.numeric(sub(",", ".", text))
# }, priority = 2),
tr <- transmuter(
match_regex("^(\\d+\\.)*\\d+,?\\d*$", function(text, match) {
as.numeric(sub(",", ".", gsub("\\.", "", text)))
}),
match_regex("^\\d\\d/\\d\\d/\\d\\d\\d\\d$", function(text, match) {
as.Date(text, "%d/%m/%Y")
}),
match_regex("^Sim|N.o$", function(text, match) {
text == "Sim"
})
)
debs_tbl.x <- transmute::transmute(tr, as.data.frame(debs_tbl))
# write.table(debs_tbl.x, "ANBIMA.csv", sep = ";", dec = ",", row.names = FALSE, na = "")
write.xlsx2(debs_tbl.x, file = file.path(DIRECTORY, "..", "ANBIMA_DEBENTURES.xlsx"),
row.names = FALSE, sheetName = "DEBENTURES")
x <- readxl::read_excel("scripts/mapeamento_colunas_debentures_ANBIMA.xlsx", col_names = FALSE)
df.x <- debs_tbl.x[,x$X__2 != "-"]
colnames(df.x) <- as.character(x$X__2[x$X__2 != "-"])
df <- lapply(stringr::str_split(df.x$valor_nominal, "\\s+"), function(x) {
data.frame(moeda = x[1], valor_nominal = as.numeric(sub(",", ".", gsub("\\.", "", x[2]))))
})
df <- do.call(rbind, df)
df.x$moeda <- df$moeda
df.x$valor_nominal <- df$valor_nominal
# write.table(df.x, "debentures_ANBIMA_simplificado.csv", sep = ";", dec = ",", row.names = FALSE, na = "")
write.xlsx2(df.x, file = file.path(DIRECTORY, "..", "ANBIMA_DEBENTURES.xlsx"),
row.names = FALSE, sheetName = "DEBENTURES_SIMPLIFICADO", append = TRUE)
library(xlsx)
# URL <- 'http://bvmf.bmfbovespa.com.br/rendafixa/DownloadResultadoResumoEmissoes.asp?intAnoEmiss=0&intMesEmiss=0&strRzSocial=&strCodEmiss=&strAgFiduc=&strBcoLider=&strGarantia=&strIndexador=&strClasse=&curVolume=0&intAnoPrazo=&intMesPrazo=&COD_TITU=1&strEmiDebPad=&strCodForMer=&strMercado=&Art1=&Art2='
# intAnoEmiss=0 &
# intMesEmiss=0 &
# strRzSocial= &
# strCodEmiss= &
# strAgFiduc= &
# strBcoLider= &
# strGarantia= &
# strIndexador= &
# strClasse= &
# curVolume=0 &
# intAnoPrazo= &
# intMesPrazo= &
# COD_TITU=1 &
# strEmiDebPad= &
# strCodForMer= &
# strMercado= &
# Art1= &
# Art2=
# -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4,la;q=0.2' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cookie: idioma=pt-br; ASPSESSIONIDASSBCBRS=NMLJFLNCDFIHFBDMAJMJGKLK; ASPSESSIONIDQCBAQDTA=AJEPHDHCOFBCDPFLKFAODHGP; _ga=GA1.3.566248668.1421426041; _gid=GA1.3.1333907278.1504277354; ASPSESSIONIDQCBAQBTB=CBHALNGAIIMILMLJODJFDEDM; _ga=GA1.4.566248668.1421426041; _gid=GA1.4.1333907278.1504277354; TS01871345=011d592ce16f82deadeb997fb5df87baa997e23e316a7aff62052d81e4cb8f133996f53e57ff0c691a06d83fc4ee27b3b7d6492930625ad01ed651881f4040d49df0005f27' -H 'Connection: keep-alive' --compressed
# df <- readxl::read_xlsx("B3_debentures.xlsx")
library(XML)
library(glue)
library(stringr)
library(xlsx)
DIRECTORY <- "data/B3"
# read file
cadDoc <- xmlInternalTreeParse("W:/Compartilhados/Macros_Departamentais-246/DC-CMOA/APPS/SIP-DB/RawData/20170831/BVBG.028.02-2017-08-31.xml")
# Filter contracts by market code (Mkt)
# 1 is spot
# 2 is futures
# 3 is options on spot
# 4 is options on futures
# 5 is forward
contr_nodes <- getNodeSet(cadDoc,
"//d:SctyCtgy[text() = '71']/parent::*", c(d="urn:bvmf.100.02.xsd"))
contr_nodes <- getNodeSet(cadDoc,
"//d:SctyCtgy[text() = '38']/parent::*", c(d="urn:bvmf.100.02.xsd"))
contr_df <- lapply(contr_nodes, function(node) {
ticker <- xmlValue(node[['TckrSymb']])
isin <- xmlValue(node[['ISIN']])
trading_start_date <- xmlValue(node[['TradgStartDt']])
trading_end_date <- xmlValue(node[['TradgEndDt']])
risk_rating <- xmlValue(node[['RskRatg']])
data.frame(Ticker = str_replace(ticker, "L1$", ""),
ISIN = isin,
TradingStartDate = as.Date(trading_start_date),
TradingEndDate = as.Date(trading_end_date),
RiskRating = risk_rating)
})
contr_df <- do.call(rbind, contr_df)
write.xlsx(contr_df, file.path(DIRECTORY, "B3_DEBENTURES.xlsx"),
row.names = FALSE)
library(httr)
httr::set_config(httr::use_proxy(url="10.0.0.24",port=8080, username="wfreitas", password="Kkkmybb1"))
get_api_settings <- function() {
url <- parse_url("https://calculadorarendafixa.com.br")
r <- GET(url)
m <- stringr::str_match(content(r, as = "text"), "authorization = '([\\d\\w]*)'")
auth_code <- if (! any(is.na(m[1, 2]))) m[1,2] else NULL
m <- stringr::str_match(content(r, as = "text"), "apiurl = '(.*)'")
api_url <- if (! any(is.na(m[1, 2]))) m[1,2] else NULL
list(auth_code = auth_code, api_url = api_url)
}
settings <- get_api_settings()
connect_calculadora <- function(x, auth_code) {
httr::VERB(
verb = "GET", url = x,
httr::add_headers(
`Accept-Encoding` = "gzip, deflate, sdch, br",
`Accept-Language` = "pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4",
`Accept` = "text/javascript, text/html, application/xml, text/xml, */*",
`Authorization` = auth_code,
`Auth-Code` = auth_code,
`X-Requested-With` = "XMLHttpRequest",
`Connection` = "keep-alive",
`Referer` = "https://calculadorarendafixa.com.br/",
`User-Agent` = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
)
)
}
list_bond_codes <- function() {
r <- connect_calculadora(modify_url("https://calculadorarendafixa.com.br",
path = c(settings$api_url, "listBondCodes")),
settings$auth_code)
if (status_code(r) == 200) {
x <- jsonlite::fromJSON(content(r, as = "text", encoding = "UTF-8"))
if (is.list(x) && exists("err", x) && x$err)
stop("Problems downloading bond codes. API HTTP status code = ", x$httpStatusCode,
"; API Message = ", x$message)
else
x
} else
stop("Unable to download bond codes. status = ", status_code(r))
}
get_bond_details <- function(bond_code, as = c("json", "text")) {
as <- match.arg(as)
url_ <- modify_url("https://calculadorarendafixa.com.br",
path = c(settings$api_url, "getBondDetails", bond_code))
r <- connect_calculadora(url_, settings$auth_code)
if (status_code(r) == 200) {
text <- content(r, as = "text", encoding = "UTF-8")
if (as == "text")
return(text)
x <- jsonlite::fromJSON(text)
if (is.list(x) && exists("err", x) && x$err)
stop("Problems downloading bond details. API HTTP status code = ", x$httpStatusCode,
"; API Message = ", x$message)
else
x
} else
stop("Unable to download bond details. status = ", status_code(r))
}
calc_bond <- function(bond_code, refdate, yield) {
url_ <- modify_url("https://calculadorarendafixa.com.br",
path = c(settings$api_url, "calcPU", bond_code, refdate, yield))
r <- connect_calculadora(url_, settings$auth_code)
if (status_code(r) == 200) {
x <- jsonlite::fromJSON(content(r, as = "text", encoding = "UTF-8"))
if (is.list(x) && exists("err", x) && x$err)
stop("Problems downloading bond results. API HTTP status code = ", x$httpStatusCode,
"; API Message = ", x$message)
else
x
} else
stop("Unable to download bond results. status = ", status_code(r))
}
# $err
# [1] TRUE
#
# $httpStatusCode
# [1] 400
#
# $httpStatusMessage
# [1] "Bad Request"
#
# $message
# [1] "Debenture AAAA11 não existente"
library(xlsx)
DIRECTORY <- "data/CETIP/cadastro"
details <- lapply(list.files(DIRECTORY, pattern = "[^_]+.json", full.names = TRUE), function(fname) {
jsonlite::fromJSON(readLines(fname))
})
details_df <- lapply(details, function(x) {
data.frame(
symbol = x$codbond,
anniversary_day = x$anniversaryday,
start_date = x$startingdate,
maturity_date = x$expiredate,
issue_date = x$issuedate,
index = x$method,
issuer = x$issuer,
notional_value = x$vne,
coupon_rate = x$yield,
stringsAsFactors = FALSE
)
})
details_df <- do.call(rbind, details_df)
write.xlsx2(details_df, file = file.path(DIRECTORY, "..", "CETIP_DEBENTURES.xlsx"),
row.names = FALSE, sheetName = "DEBENTURES")
cashflow_df <- lapply(details, function(x) {
df <- x$events
df_A <- subset(df, eventType == "A")
df_J <- subset(df, eventType == "J")
df_A$eventType <- NULL
df_J$eventType <- NULL
if (nrow(df_A) > 0 && nrow(df_J) > 0) {
df_A$amort <- df_A$yield
df_J$coupon <- df_J$yield
df_J$yield <- NULL
df_A$yield <- NULL
df <- merge(df_A, df_J, by = "date", all = TRUE)
} else if (nrow(df_A) == 0) {
df_J$coupon <- df_J$yield
df_J$amort <- NA
df_J$yield <- NULL
df <- df_J
} else if (nrow(df_J) == 0) {
df_A$amort <- df_A$yield
df_A$coupon <- NA
df_A$yield <- NULL
df <- df_A
}
df$symbol <- x$codbond
ox <- order(df$date)
df[ox,colnames(df)[c(ncol(df),seq_len(ncol(df)-1))]]
})
cashflow_df <- do.call(rbind, cashflow_df)
write.xlsx2(cashflow_df, file = file.path(DIRECTORY, "..", "CETIP_DEBENTURES.xlsx"),
row.names = FALSE, sheetName = "CASHFLOW", append = TRUE)
library(digest)
source("scripts/CETIP_calculadorarendafixa_functions.R")
OUT_DIRECTORY <- "data/CETIP/cadastro"
if (! dir.exists(OUT_DIRECTORY))
stop(OUT_DIRECTORY, " does not exist!")
HASHES_FILE <- file.path(OUT_DIRECTORY, "hashes.rda")
strsort <- function(x) sapply(lapply(strsplit(x, NULL), sort), paste, collapse="")
save_file <- function(code) {
fname <- file.path(OUT_DIRECTORY, paste0(code, ".json"))
if (file.exists(fname)) {
fname <- file.path(OUT_DIRECTORY, paste0(code, "_", format(Sys.Date(), "%Y%m%d"), ".json"))
}
writeChar(x, fname)
}
codes <- list_bond_codes()
bonds_details <- lapply(codes, function(code) {
details <- try(get_bond_details(code, as = "text"))
if (is(details, "try-error")) {
futile.logger::flog.error("%s mensagem = %s", code, conditionMessage(attr(details, "condition")))
NULL
} else {
futile.logger::flog.info("%s baixado", code)
details
}
})
names(bonds_details) <- codes
hashes <- lapply(bonds_details, function(x) {
x <- strsort(stringr::str_trim(x))
digest(x, algo = "sha1")
})
STORE <- new.env()
load(HASHES_FILE, STORE)
checked <- sapply(names(STORE$hashes), function(nx) {
if (exists(nx, hashes)) {
hashes[[nx]] == STORE$hashes[[nx]]
} else NA
})
not_checked <- names(STORE$hashes)[!checked]
not_checked <- Filter(function(x) !is.na(x), not_checked)
for (code in not_checked) {
futile.logger::flog.info("Atualizando %s debênture", code)
x <- bonds_details[[code]]
STORE$hashes[[code]] <- hashes[[code]]
save_file(code)
}
if ( any(! names(hashes) %in% names(STORE$hashes)) ) {
futile.logger::flog.info("%d debêntures novas", length(missing))
ix <- ! names(hashes) %in% names(STORE$hashes)
for (code in names(hashes)[ix]) {
futile.logger::flog.info("Salvando %s debênture", code)
x <- bonds_details[[code]]
STORE$hashes[[code]] <- hashes[[code]]
save_file(code)
}
}
hashes <- STORE$hashes
save(hashes, file = HASHES_FILE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment