Skip to content

Instantly share code, notes, and snippets.

@nenodias
Last active April 27, 2017 18:04
Show Gist options
  • Select an option

  • Save nenodias/9818e31d5542557e0f91392f50f708a4 to your computer and use it in GitHub Desktop.

Select an option

Save nenodias/9818e31d5542557e0f91392f50f708a4 to your computer and use it in GitHub Desktop.
Exemplo crowler selenium com python 3
# -*- coding: utf-8 -*-
import selenium
from pdb import set_trace
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
# Abre o navegador
browser = webdriver.Firefox()
browser.wait = WebDriverWait(browser, 5)
lista = ['bauru', 'taquaitinga', 'matao']
for item in lista:
browser.get("https://www.youtube.com")
caixa_pesquisa = browser.find_element_by_id('masthead-search-terms')
botao_pesquisa = browser.find_element_by_id('search-btn')
# Escreve no input
caixa_pesquisa.send_keys(Keys.DELETE)
caixa_pesquisa.send_keys(item)
botao_pesquisa.click()
primeiro = True
for page in range(1, 6):
# browser.find_elements_by_xpath()#Busca vários por XPath
# browser.find_element_by_xpath()#Busca um elemento com XPath
if not primeiro:
# botao_next = browser.find_elements_by_xpath(
# '//a[@data-link-type="next"]')[0]#Busca um elemento com XPath
btn = browser.find_elements_by_xpath('//span[text()="Próximo »"]')
if btn:
botao_next = btn[0]
# Busca um elemento com XPath
botao_next.click()
try:
progress = browser.wait.until(
EC.presence_of_element_located((By.ID, "progress"))
)
browser.wait.until(EC.staleness_of(progress))
except TimeoutException:
pass
else:
primeiro = False
soap = BeautifulSoup(browser.page_source)
soap.prettify()
table_res = soap.find('div', {"class": "yt-card"})
for link in table_res('a', {'rel': 'spf-prefetch'}):
print(link.text)
print("\n")
print(link['href'])
print("\n")
print('-' * 80)
print("\n")
# Fecha o navegador
browser.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment