Last active
November 17, 2022 13:26
-
-
Save gabrielzanlorenssi/3206189891d666b57bd43522b7a99bd9 to your computer and use it in GitHub Desktop.
bolsonaro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(tidyverse) | |
#-- links | |
url = "https://www.camara.leg.br/internet/sitaqweb/resultadoPesquisaDiscursos.asp?txIndexacao=&CurrentPage=19&BasePesq=plenario&txOrador=JAIR%20BOLSONARO&txPartido=&dtInicio=&dtFim=&txUF=&txSessao=&listaTipoSessao=&listaTipoInterv=&inFalaPres=&listaTipoFala=&listaFaseSessao=&txAparteante=&listaEtapa=&CampoOrdenacao=dtSessao&TipoOrdenacao=DESC&PageSize=50&txTexto=&txSumario=" | |
url %>% | |
read_html(url) %>% | |
html_nodes("a") %>% | |
html_attr("href") %>% | |
tibble() %>% | |
set_names("x") %>% | |
filter(str_detect(x, "TextoHTML")) %>% | |
mutate(x = paste0("https://www.camara.leg.br/internet/sitaqweb/", | |
str_replace_all(x, pattern="\\r|\\n|\\t", "")), | |
x = str_replace_all(x, " ", "%20"), | |
x = str_replace_all(x, "á", "%C3%A1"), | |
x = str_replace_all(x, "ç", "%C3%A7"), | |
x = str_replace_all(x, "õ", "%C3%B5"), | |
x = str_replace_all(x, "ó", "%C3%B3"), | |
x = str_replace_all(x, "ã", "%C3%A3")) -> b | |
#-- get text | |
disc <- imap_chr(b$x, function(x,y) { | |
x %>% | |
read_html() %>% | |
html_nodes("p") %>% | |
html_text() -> z | |
z <- trimws(str_replace_all(z, pattern="\\r|\\n|\\t", " ")) | |
print(y) | |
return(z) | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
O link que ele acessa somente inclui falas do Jair. Se quer de outro político, é só o link incluir outras falas