This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import html | |
import requests | |
import json | |
import argparse | |
from collections import OrderedDict | |
def get_headers(): | |
return {"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", | |
"accept-encoding": "gzip, deflate, br", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import requests | |
from selectorlib import Extractor | |
from formatter_classes import formatters | |
def write_to_file(response): | |
# writes HTML response to a file for debugging purpose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_id": "h_and_m", | |
"startUrl": [ | |
"https://www2.hm.com/en_us/women/products/shoes.html?product-type=ladies_shoes&sort=stock&productTypes=shoes&sizes=15_6_6_footwear&colorWithNames=black_000000&image-size=small&image=model&offset=0&page-size=36" | |
], | |
"selectors": [ | |
{ | |
"id": "listing", | |
"type": "SelectorElementClick", | |
"parentSelectors": [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_id": "wayfair", | |
"startUrl": [ | |
"https://www.wayfair.com/outdoor/sb0/hammocks-with-stands-c1864031.html" | |
], | |
"selectors": [ | |
{ | |
"id": "links", | |
"type": "SelectorLink", | |
"parentSelectors": [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
from csv import DictReader | |
from os import path | |
from tripadvisor_restaurants.items import TripadvisorRestaurantsItem | |
from urllib.parse import urljoin | |
class TripadvisorRestaurantsSpiderSpider(scrapy.Spider): | |
name = 'tripadvisor_restaurants_spider' | |
allowed_domains = ['tripadvisor.com'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_id":"overstock_new", | |
"startUrl":[ | |
"https://www.overstock.com/Home-Garden/Casual-Dinnerware/Gibson,/brand,/6451/subcat.html" | |
], | |
"selectors":[ | |
{ | |
"id":"product", | |
"type":"SelectorLink", | |
"parentSelectors":[ |