This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import scrapy | |
from scrapy import FormRequest | |
class NeuvooSpider(scrapy.Spider): | |
name = 'neuvoo' | |
allowed_domains = ['neuvoo.com'] | |
def start_requests(self): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
class MetrocuadradoSpider(scrapy.Spider): | |
name = 'metroCuadrado' | |
allowed_domains = ['metrocuadrado.com'] | |
def start_requests(self): | |
yield scrapy.Request( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Make sure to add the proxy address, username and passowrd | |
# Make sure to append the ProxyMiddleware class to the DOWNLOADER_MIDDLEWARES dict in the settings.py file | |
from w3lib.http import basic_auth_header | |
class ProxyMiddleware: | |
def process_request(self, request, spider): | |
request.meta['proxy'] = "COMPANY PROXY URL OR ADDRESS" | |
request.headers['Proxy-Authorization'] = basic_auth_header('USERNAME', 'PASSWORD') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MySpider(scrapy.Spider): | |
name = 'example' | |
current_page = 1 | |
script = ''' | |
function main(splash, args) | |
splash.private_mode_enabled = false | |
url = args.url | |
assert(splash:go(url)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Define here the models for your scraped items | |
# | |
# See documentation in: | |
# https://doc.scrapy.org/en/latest/topics/items.html | |
# | |
# -*- coding: utf-8 -*- | |
# Define here the models for your scraped items |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
from scrapy.selector import Selector | |
from scrapy_selenium import SeleniumRequest | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import datetime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.exceptions import DropItem | |
class DuplicatesPipeline(object): | |
def __init__(self): | |
self.emails_seen = set() | |
def process_item(self, item, spider): | |
if item['email'] in self.emails_seen: | |
raise DropItem("Duplicate item found: %s" % item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
from scrapy.http import FormRequest | |
class SaeedSpider(scrapy.Spider): | |
name = 'saeed' | |
start_urls = [ | |
'https://seffaflik.epias.com.tr/transparency/uretim/gerceklesen-uretim/gercek-zamanli-uretim.xhtml'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def strip_output(self, input): | |
try: | |
return input.strip() | |
catch NoneType: | |
return "" | |
def parse(self, response): | |
yield { | |
"target_field": self.strip_output(response.xpath('//label[contains(text(),"Founded:")]/following-sibling::p/text()').get()) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
class BlockchainSpider(scrapy.Spider): | |
name = 'blockchain' | |
allowed_domains = ['www.blockchain.com'] | |
start_urls = ['https://www.blockchain.com/explorer'] |
NewerOlder