Skip to content

Instantly share code, notes, and snippets.

@jjesusfilho
Last active June 30, 2017 15:58
Show Gist options
  • Save jjesusfilho/693c78957d569b4bacb32f61aea7cfd9 to your computer and use it in GitHub Desktop.
Save jjesusfilho/693c78957d569b4bacb32f61aea7cfd9 to your computer and use it in GitHub Desktop.
Função para extrair metadados e inteiro teor do Tribunal de Justiça de Santa Catarina
library(httr)
library(xml2)
library(stringr)
library(purrr)
library(tibble)
## Há duas opções: usar uma palavra ou uma frase.
tjscSG<- function(palavra="",frase=""){
url<-"http://busca.tjsc.jus.br/jurisprudencia/buscaForm.do"
a<-GET(url)
body<-list(q = "", only_ementa = "", frase = "", excluir = "",
qualquer = "", `NA` = NA_character_, prox1 = "", prox2 = "",
proxc = "", sort = "dtJulgamento desc", ps = "50", busca = "avancada",
pg = "1", flapto = "1", radio_campo = "integra", `categoria[]` = "acordaos",
faceta = "false")
body$`q`<-palavra
frase<-deparse(frase)
body$`frase`<-frase
url2<-"http://busca.tjsc.jus.br/jurisprudencia/buscaajax.do?&categoria=acordaos"
b<-POST(url2,body=body,encode="form",set_cookies(unlist(a$cookies)))
num<- b %>%
httr::content() %>%
xml_find_all("//*[@class='texto_resultados']") %>%
xml_text() %>%
str_extract("\\d+") %>%
as.numeric() %>%
`/`(50) %>%
ceiling()
c<-NULL
for(i in 1:num){
body[[13]]<-i
d<-url2 %>% POST(body=body,encode="form",set_cookies(unlist(a$cookies))) %>%
httr::content() %>%
xml_find_all("//*[@id='coluna_principal']/div/div[3]/a/@href") %>%
xml_text() %>%
paste0("http://busca.tjsc.jus.br/jurisprudencia/",.)
c<-c(c,d)
}
c<-map_chr(c,URLencode)
resultado<- map(c,function(x){
x %>% GET() %>%
httr::content()
})
inteiroTeor<-map_chr(resultado,function(x){
x %>% xml_find_all("//*[@id='coluna_principal']/div/div[3]") %>%
xml_text(trim=T)
})
meta<-map(resultado, function(x){
x %>% xml_find_all("//*[@class='resultados']/strong") %>%
xml_text(trim=T) %>%
.[1:8]
})
dados<-map(resultado, function(x){
x %>% xml_find_all("//*[@class='resultados']/strong/following-sibling::text()") %>%
xml_text(trim=T) %>%
.[1:8]
})
df2<-map2(meta,dados,~paste(.x,.y)) %>% unlist()
df3<-df2 %>% matrix(ncol=8,byrow=T) %>%
as_tibble()
df3$V6<-ifelse(str_detect(df3$V7,"Classe"),df3$V7,df3$V6)
df3<-df3[1:6]
names(df3)<-c("processo","relator","origem","camara","data","acao")
df3<-map_df(df3,~str_replace(.x,"(.*:\\s)?",""))
df3$inteiro<-inteiroTeor
return(df3)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment