Created
September 21, 2019 13:33
-
-
Save jagedn/e73e606ebcbd6c3994116a0f5b73588b to your computer and use it in GitHub Desktop.
Busca, extrae y guarda en Google Sheet las noticias principales sobre un tema
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package scrapping.google | |
import geb.Page | |
import geb.spock.GebSpec | |
import org.openqa.selenium.By | |
import org.openqa.selenium.OutputType | |
import org.openqa.selenium.Point | |
import org.openqa.selenium.TakesScreenshot | |
import org.openqa.selenium.WebElement | |
import spock.lang.Specification | |
import com.puravida.groogle.* | |
class AppTest extends GebSpec { | |
String query = "experto+educacion" | |
String sheetId = 'thesheetId' | |
int pages = 3 | |
def "scrapeemos google"() { | |
given: | |
List<String[]>list = [] | |
String hoy = new Date().toString() | |
when: | |
go "https://www.google.com/search?q=$query&tbs=qdr:d,lr:lang_1es&tbm=nws&source=lnt&lr=lang_es&X=sa&ved=0ahUKEwis_7yak9_kAhVJQEEAHb3pDW8QpwUIHw&biw=1920&bih=1008&dpr=1" | |
for(int npage=1; npage<pages+1; npage++){ | |
println "Page ${npage}" | |
println driver.pageSource | |
waitFor { $('g-card').size() } | |
$('g-card').each{ | |
def anchors = it.find('a') | |
list.add( [ hoy, it.text(), anchors[0].@href ] ) | |
} | |
if( $("a", "aria-label": "Page ${npage+1}").size() ){ | |
$("a", "aria-label": "Page ${npage+1}").first().click() | |
sleep 1000 | |
}else{ | |
break | |
} | |
} | |
println list | |
then: | |
list.size() | |
when: | |
def groogle = GroogleBuilder.build { | |
withServiceCredentials{ | |
withScopes "https://www.googleapis.com/auth/spreadsheets" //SheetsScopes.SPREADSHEETS | |
usingCredentials 'groogle-news.json' | |
} | |
service(SheetServiceBuilder.build(), SheetService) | |
} | |
groogle.service(SheetService.class).withSpreadSheet sheetId, { | |
withSheet 'News', { | |
append{ | |
list.each{ | |
insert it | |
} | |
} | |
} | |
} | |
then: | |
true | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment