Skip to content

Instantly share code, notes, and snippets.

@jagedn
Created September 21, 2019 13:33
Show Gist options
  • Save jagedn/e73e606ebcbd6c3994116a0f5b73588b to your computer and use it in GitHub Desktop.
Save jagedn/e73e606ebcbd6c3994116a0f5b73588b to your computer and use it in GitHub Desktop.
Busca, extrae y guarda en Google Sheet las noticias principales sobre un tema
package scrapping.google
import geb.Page
import geb.spock.GebSpec
import org.openqa.selenium.By
import org.openqa.selenium.OutputType
import org.openqa.selenium.Point
import org.openqa.selenium.TakesScreenshot
import org.openqa.selenium.WebElement
import spock.lang.Specification
import com.puravida.groogle.*
class AppTest extends GebSpec {
String query = "experto+educacion"
String sheetId = 'thesheetId'
int pages = 3
def "scrapeemos google"() {
given:
List<String[]>list = []
String hoy = new Date().toString()
when:
go "https://www.google.com/search?q=$query&tbs=qdr:d,lr:lang_1es&tbm=nws&source=lnt&lr=lang_es&X=sa&ved=0ahUKEwis_7yak9_kAhVJQEEAHb3pDW8QpwUIHw&biw=1920&bih=1008&dpr=1"
for(int npage=1; npage<pages+1; npage++){
println "Page ${npage}"
println driver.pageSource
waitFor { $('g-card').size() }
$('g-card').each{
def anchors = it.find('a')
list.add( [ hoy, it.text(), anchors[0].@href ] )
}
if( $("a", "aria-label": "Page ${npage+1}").size() ){
$("a", "aria-label": "Page ${npage+1}").first().click()
sleep 1000
}else{
break
}
}
println list
then:
list.size()
when:
def groogle = GroogleBuilder.build {
withServiceCredentials{
withScopes "https://www.googleapis.com/auth/spreadsheets" //SheetsScopes.SPREADSHEETS
usingCredentials 'groogle-news.json'
}
service(SheetServiceBuilder.build(), SheetService)
}
groogle.service(SheetService.class).withSpreadSheet sheetId, {
withSheet 'News', {
append{
list.each{
insert it
}
}
}
}
then:
true
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment