Skip to content

Instantly share code, notes, and snippets.

@jtrecenti
Created September 23, 2024 21:51
Show Gist options
  • Save jtrecenti/61cb2a55fcb9f16fab0df1a1f570b38d to your computer and use it in GitHub Desktop.
Save jtrecenti/61cb2a55fcb9f16fab0df1a1f570b38d to your computer and use it in GitHub Desktop.
u <- "https://consumidor.gov.br/pages/indicador/relatos/consultar"
parametros <- list(
indicePrimeiroResultado = 0,
palavrasChave = "",
segmentoMercado = 4,
fornecedor = "",
regiao = "",
area = "",
assunto = "",
problema = "",
dataInicio = "",
dataTermino = "",
avaliacao = "",
nota = ""
)
r <- httr::POST(u, body = parametros, encode = "form")
cards <- r |>
httr::content() |>
xml2::xml_find_all("//div[contains(@class,'cartao-relato')]")
parse_card <- function(card) {
classes <- c(
"relatos-nome-empresa",
"relatos-status",
"relatos-data"
)
tags <- "p"
txt1 <- purrr::map(
classes,
\(x) xml2::xml_text(xml2::xml_find_all(card, glue::glue(".//*[contains(@class,'{x}')]")))
) |>
unlist() |>
stringr::str_squish()
txt2 <- purrr::map(tags, \(x) xml2::xml_text(xml2::xml_find_all(card, glue::glue(".//{x}")))) |>
unlist() |>
stringr::str_squish()
tibble::tibble(
empresa = txt1[1],
status = txt1[2],
data_local = txt1[3],
data_resposta = txt1[4],
reclamacao = txt2[1],
resposta = txt2[2],
nota = txt2[3],
feedback = txt2[4]
)
}
purrr::map(cards, parse_card) |>
purrr::list_rbind(names_to = "id")
get_page <- function(pag) {
u <- "https://consumidor.gov.br/pages/indicador/relatos/consultar"
parametros <- list(
indicePrimeiroResultado = (pag - 1) * 10,
palavrasChave = "",
segmentoMercado = 4,
fornecedor = "",
regiao = "",
area = "",
assunto = "",
problema = "",
dataInicio = "",
dataTermino = "",
avaliacao = "",
nota = ""
)
r <- httr::POST(u, body = parametros, encode = "form")
cards <- r |>
httr::content() |>
xml2::xml_find_all("//div[contains(@class,'cartao-relato')]")
purrr::map(cards, parse_card) |>
purrr::list_rbind(names_to = "id")
}
res <- purrr::map(1:400, get_page, .progress = TRUE) |>
purrr::list_rbind(names_to = "pag")
da_reclamacoes <- res |>
tidyr::separate(
data_local,
into = c("data", "local"),
sep = ", ",
extra = "merge"
) |>
dplyr::mutate(
data = lubridate::dmy(data),
data_resposta = dplyr::case_when(
stringr::str_detect(data_resposta, "no mesmo dia") ~ "0",
TRUE ~ stringr::str_extract(data_resposta, "\\d+")
),
data_resposta = data + readr::parse_number(data_resposta),
nota = readr::parse_number(nota)
)
dplyr::glimpse(da_reclamacoes)
range(da_reclamacoes$data)
da_reclamacoes |>
dplyr::count(empresa, sort = TRUE)
da_reclamacoes_filtrada <- da_reclamacoes |>
dplyr::filter(empresa %in% c("Latam Airlines (Tam)", "Azul Linhas Aéreas", "Gol Linhas Aéreas"))
writexl::write_xlsx(
da_reclamacoes,
"/Users/julio/Downloads/reclamacoes_transp_aereo.xlsx"
)
writexl::write_xlsx(
da_reclamacoes_filtrada,
"/Users/julio/Downloads/reclamacoes_transp_aereo_filtrada.xlsx"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment