Skip to content

Instantly share code, notes, and snippets.

View Granitosaurus's full-sized avatar
🍯
🐝

Bernardas Ališauskas Granitosaurus

🍯
🐝
View GitHub Profile
states = [u'US', u'AL', u'AK', u'AZ', u'AR', u'CA', u'CO', u'CT', u'DE', u'DC', u'FL', u'GA', u'HI', u'ID',
u'IL', u'IN', u'IA', u'KS', u'KY', u'LA', u'ME', u'MD', u'MA', u'MI', u'MN', u'MS', u'MO', u'MT',
u'NE', u'NV', u'NH', u'NJ', u'NM', u'NY', u'NC', u'ND', u'OH', u'OK', u'OR', u'PA', u'RI', u'SC',
u'SD', u'TN', u'TX', u'UT', u'VT', u'VA', u'WA', u'WV', u'WI', u'WY', u'AS', u'GU', u'MP', u'PR',
u'VI', u'UM', u'FM', u'MH', u'PW']
#unicode
STATE_UNICODE = {u'WA': u'Washington', u'DE': u'Delaware', u'DC': u'District of Columbia', u'WI': u'Wisconsin',
u'WV': u'West Virginia', u'HI': u'Hawaii', u'FL': u'Florida', u'FM': u'Baker Island',
u'WY': u'Wyoming', u'NH': u'New Hampshire', u'UM': u'U.S. Minor Outlying Islands', u'NJ': u'New Jersey',
u'NM': u'New Mexico', u'TX': u'Texas', u'LA': u'Louisiana', u'NC': u'North Carolina',
u'ND': u'North Dakota', u'NE': u'Nebraska', u'TN': u'Tennessee', u'NY': u'New York',
u'PA': u'Pennsylvania', u'AK': u'Alaska', u'NV': u'Nevada', u'VA': u'Virginia', u'GU': u'Guam',
u'CO': u'Colorado', u'PW': u'Jarvis Island', u'VI': u'Virgin Islands', u'CA': u'California',
u'AL': u'Alabama', u'AS': u'American Samoa', u'AR': u'Arkansas', u'VT': u'Vermont', u'IL': u'Illinois',
u'GA': u'Georgia', u'IN': u'Indiana', u'IA': u'Iowa', u'OK': u'Oklahoma', u'AZ': u'Arizona',
@Granitosaurus
Granitosaurus / countries
Created September 30, 2014 16:25
World country data: name, gec, iso2, iso3, numeric iso, stanag and internet domain)
[
{
"gec": "AF",
"internet": ".af",
"iso2": "AF",
"iso3": "AFG",
"iso_num": "004",
"name": "Afghanistan",
"stanag": "AFG"
},
@Granitosaurus
Granitosaurus / poets.py
Last active October 5, 2016 08:33
poets.org spider
import scrapy
class PoetsSpider(scrapy.Spider):
"""scrapy spider to scrape poems from poets.org website"""
name = 'poets.org'
start_urls = ['https://www.poets.org/poetsorg/poems']
allowed_domains = ['poets.org']
def parse(self, response):
@Granitosaurus
Granitosaurus / steam_game_linkinator
Last active September 29, 2017 22:26
converts a list of games into linked items with reviews.
import requests
from parsel import Selector
def scrape():
data = """
7 Grand Steps: What Ancients Begat (DRM Free + Steam)
2064: Read Only Memories (DRM Free + Steam)
A Virues Named TOM (DRM Free + Steam)
AI War: Fleet Command (DRM Free + Steam)
# -*- coding: utf-8 -*-
import scrapy
class MyipSpider(scrapy.Spider):
name = "myip"
allowed_domains = ["http://httpbin.org/ip"]
start_urls = (
'http://httpbin.org/ip',
)
@Granitosaurus
Granitosaurus / bundlestars.py
Last active April 21, 2017 06:45
Scraper for bundlestars bundles, ouputs links and steam reviews in reddit comment format.
import json
import re
import requests
from parsel import Selector
def scrape():
data = requests.get('https://www.bundlestars.com/api/promotions/mega-pick-mix-bundle-2')
products = json.loads(data.text)[0]['products']
@Granitosaurus
Granitosaurus / vr_check
Created May 5, 2017 11:33
Python script for checking whether a video game on steam supports VR devices.
#!/usr/bin/env python3
from urllib.parse import quote, unquote
# requires click, parsel, requests_futures from pip
# requires python3.6
import click
from requests_futures.sessions import FuturesSession
from parsel import Selector
@Granitosaurus
Granitosaurus / check_proxies.py
Created March 6, 2018 05:53
Check proxies with async python
import os
from requests.exceptions import ProxyError, ReadTimeout
from requests_futures.sessions import FuturesSession
def check_proxies(proxies, max_workers=5, timeout=5):
"""
Check whether proxies are functional and whether authentication matches.
This function will filter out any proxies that:
* return 407 credential missmatch
# instead of
with open('./data/avail_urls.txt', 'w') as f:
for item in items:
if 'archived_snapshots' in item:
if 'closest' in item['archived_snapshots']:
f.write(item['archived_snapshots']['closest']['url'] + '\n')
# write
with open('./data/avail_urls.txt', 'w') as f:
for item in items:
if 'closest' not in item.get('archived_snapshots', []):