Last active
November 26, 2021 20:34
-
-
Save sientifiko/e391f051e7d7831e7ab16168233a3213 to your computer and use it in GitHub Desktop.
Codigo para scrapear el servel sacando votación por comuna
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(rvest) | |
library(RSelenium) | |
# funciones pa limpiar | |
limpieza <- function(objtabla){ | |
objtabla <- objtabla[-which(is.na(objtabla[,1])), ] | |
objtabla <- objtabla[c(1:(nrow(objtabla)-5)),] | |
objtabla$Electo <- str_detect(objtabla[, 6], "[*]") | |
objtabla["NA."] <- NULL | |
objtabla$pacto <- str_sub(objtabla[,1], 1, 1) %>% | |
as.numeric() | |
objtabla$pacto <- if_else(is.na(objtabla$pacto), T, NA) | |
objtabla[which(objtabla[,6]),6] <- objtabla[which(objtabla[,6]),1] | |
objtabla <- objtabla %>% fill(pacto, .direction = "down") | |
objtabla <- objtabla[-which(objtabla[, 2] == ""),] | |
} | |
url <- "https://www.servelelecciones.cl/" | |
# configuraciones iniciales | |
driver <- rsDriver(browser = c("firefox")) | |
remote_driver <- driver[["client"]] | |
# abrir el phantomJS | |
remote_driver$open() | |
# ir al sitio | |
remote_driver$navigate(url) | |
# xpath primera comuna | |
# //*[@id="selComunas"]/option[2] | |
# xpath ultima comuna | |
# //*[@id="selComunas"]/option[346] | |
lista <- list() | |
for (i in 2:346) { | |
# construir el path | |
path <- paste0('//*[@id="selComunas"]/option[', i, ']') | |
# ir a la comuna buscada | |
remote_driver$findElement(using = "id", | |
value = "selComunas")$findChildElement(using = "xpath", | |
path)$clickElement() | |
# enfocarse en la comuna elegida pa extraerla | |
tempRD <- remote_driver$findElement(using = "id", | |
value = "selComunas") | |
# extraer la comuna | |
tempRD$getElementAttribute("outerHTML")[[1]] %>% | |
read_html() %>% | |
html_node(xpath = path) %>% | |
html_text() -> comuna | |
# enfocarse en la tabla del estado actual | |
tabla <- remote_driver$findElement("id", "divVotacion") | |
# sacarle las cosas a esa tabla | |
objtabla <- tabla$getElementAttribute("outerHTML")[[1]] %>% | |
read_html() %>% | |
html_nodes(xpath = '//*[@id="basic-table"]/table') %>% | |
html_table(fill = T) %>% | |
as.data.frame() | |
# limpiarla | |
temptable <- limpieza(objtabla) | |
# imprimir comuna en la que se va | |
print(paste("Comuna: ", comuna)) | |
# pasarle la comuna | |
temptable$comuna <- comuna | |
# acumular en la lista | |
lista[[i-1]] <- temptable | |
# descansar pa que no nos boten | |
Sys.sleep(3) | |
} | |
rm(i, objtabla, tempRD, tabla, temptable, path, comuna) | |
consolidado <- do.call("rbind", lista) | |
write.csv(consolidado, "votacion_comuna.csv", row.names = F) | |
remote_driver$close() | |
remote_driver$closeServer() | |
remote_driver$closeall() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment