This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
states = [u'US', u'AL', u'AK', u'AZ', u'AR', u'CA', u'CO', u'CT', u'DE', u'DC', u'FL', u'GA', u'HI', u'ID', | |
u'IL', u'IN', u'IA', u'KS', u'KY', u'LA', u'ME', u'MD', u'MA', u'MI', u'MN', u'MS', u'MO', u'MT', | |
u'NE', u'NV', u'NH', u'NJ', u'NM', u'NY', u'NC', u'ND', u'OH', u'OK', u'OR', u'PA', u'RI', u'SC', | |
u'SD', u'TN', u'TX', u'UT', u'VT', u'VA', u'WA', u'WV', u'WI', u'WY', u'AS', u'GU', u'MP', u'PR', | |
u'VI', u'UM', u'FM', u'MH', u'PW'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#unicode | |
STATE_UNICODE = {u'WA': u'Washington', u'DE': u'Delaware', u'DC': u'District of Columbia', u'WI': u'Wisconsin', | |
u'WV': u'West Virginia', u'HI': u'Hawaii', u'FL': u'Florida', u'FM': u'Baker Island', | |
u'WY': u'Wyoming', u'NH': u'New Hampshire', u'UM': u'U.S. Minor Outlying Islands', u'NJ': u'New Jersey', | |
u'NM': u'New Mexico', u'TX': u'Texas', u'LA': u'Louisiana', u'NC': u'North Carolina', | |
u'ND': u'North Dakota', u'NE': u'Nebraska', u'TN': u'Tennessee', u'NY': u'New York', | |
u'PA': u'Pennsylvania', u'AK': u'Alaska', u'NV': u'Nevada', u'VA': u'Virginia', u'GU': u'Guam', | |
u'CO': u'Colorado', u'PW': u'Jarvis Island', u'VI': u'Virgin Islands', u'CA': u'California', | |
u'AL': u'Alabama', u'AS': u'American Samoa', u'AR': u'Arkansas', u'VT': u'Vermont', u'IL': u'Illinois', | |
u'GA': u'Georgia', u'IN': u'Indiana', u'IA': u'Iowa', u'OK': u'Oklahoma', u'AZ': u'Arizona', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"gec": "AF", | |
"internet": ".af", | |
"iso2": "AF", | |
"iso3": "AFG", | |
"iso_num": "004", | |
"name": "Afghanistan", | |
"stanag": "AFG" | |
}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
class PoetsSpider(scrapy.Spider): | |
"""scrapy spider to scrape poems from poets.org website""" | |
name = 'poets.org' | |
start_urls = ['https://www.poets.org/poetsorg/poems'] | |
allowed_domains = ['poets.org'] | |
def parse(self, response): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from parsel import Selector | |
def scrape(): | |
data = """ | |
7 Grand Steps: What Ancients Begat (DRM Free + Steam) | |
2064: Read Only Memories (DRM Free + Steam) | |
A Virues Named TOM (DRM Free + Steam) | |
AI War: Fleet Command (DRM Free + Steam) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
class MyipSpider(scrapy.Spider): | |
name = "myip" | |
allowed_domains = ["http://httpbin.org/ip"] | |
start_urls = ( | |
'http://httpbin.org/ip', | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import re | |
import requests | |
from parsel import Selector | |
def scrape(): | |
data = requests.get('https://www.bundlestars.com/api/promotions/mega-pick-mix-bundle-2') | |
products = json.loads(data.text)[0]['products'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from urllib.parse import quote, unquote | |
# requires click, parsel, requests_futures from pip | |
# requires python3.6 | |
import click | |
from requests_futures.sessions import FuturesSession | |
from parsel import Selector |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from requests.exceptions import ProxyError, ReadTimeout | |
from requests_futures.sessions import FuturesSession | |
def check_proxies(proxies, max_workers=5, timeout=5): | |
""" | |
Check whether proxies are functional and whether authentication matches. | |
This function will filter out any proxies that: | |
* return 407 credential missmatch |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# instead of | |
with open('./data/avail_urls.txt', 'w') as f: | |
for item in items: | |
if 'archived_snapshots' in item: | |
if 'closest' in item['archived_snapshots']: | |
f.write(item['archived_snapshots']['closest']['url'] + '\n') | |
# write | |
with open('./data/avail_urls.txt', 'w') as f: | |
for item in items: | |
if 'closest' not in item.get('archived_snapshots', []): |
OlderNewer