Skip to content

Instantly share code, notes, and snippets.

@sillasgonzaga
Created April 26, 2018 17:52
Show Gist options
  • Save sillasgonzaga/9d67c23044d963f82fc0c5e7d311335a to your computer and use it in GitHub Desktop.
Save sillasgonzaga/9d67c23044d963f82fc0c5e7d311335a to your computer and use it in GitHub Desktop.
library(tabulizer)
library(tidyverse)
arquivo <- "C:/Users/sillas.gonzaga/Downloads/gsa.pdf"
tab <- tabulizer::extract_tables(arquivo, encoding = "UTF-8", method = "lattice")
map(tab,dim)
# extrair tabelas com 8 colunas
df_operacoes <- tab %>%
keep(~ dim(.x)[[2]] == 8) %>%
imap(~ as.tibble(.) %>%
mutate(pagina = .y)) %>%
bind_rows()
glimpse(df_operacoes)
# colunas 5 a 8 sao numericas
limpar_col_numerica <- function(x){
x <- stringr::str_remove_all(x, "[R$]")
x <- stringr::str_remove_all(x, "[\r]")
x <- stringr::str_remove_all(x, "[.]")
x <- stringr::str_replace_all(x, ",", ".")
as.numeric(x)
}
limpar_col_char <- function(x){
x <- stringr::str_replace_all(x, "[\r]", " ")
x <- stringr::str_replace_all(x, "Preferemcal|Preferenciall|Preferenciais|Preferencial", "PN")
x <- stringr::str_to_upper(x)
x
}
df_operacoes <- df_operacoes %>%
mutate_at(vars(V5:V8), limpar_col_numerica) %>%
filter(!is.na(V7)) %>%
mutate(V5 = as.integer(V5)) %>%
mutate_at(vars(V1:V4), limpar_col_char) %>%
group_by(pagina) %>%
mutate(linha_pagina = row_number()) %>%
ungroup()
table(df_operacoes$V4)
df_operacoes %>%
group_by(V4) %>%
summarise(v = sum(V8))
df_operacoes %>%
filter(V4 == "VENDA") %>%
group_by(pagina) %>%
summarise(s = sum(V8))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment