Skip to content

Instantly share code, notes, and snippets.

@RyanKung
Created August 17, 2017 14:42
Show Gist options
  • Select an option

  • Save RyanKung/52a2f34b6e0f6ee71d67d64502f9fca3 to your computer and use it in GitHub Desktop.

Select an option

Save RyanKung/52a2f34b6e0f6ee71d67d64502f9fca3 to your computer and use it in GitHub Desktop.
import logging
from pulsar.apps import http
import requests
from bs4 import BeautifulSoup
import random
class Request():
def __init__(self, https=True, timeout=3):
self.https = True
self.schema = 'https' if self.https else 'http'
self.init_proxy_list(https)
self.timeout = timeout
def init_proxy_list(self, https):
url = 'http://31f.cn/%s-proxy/' % self.schema
path = 'body > div.container > table.table.table-striped > tbody > tr'
resp = requests.get(url)
data = BeautifulSoup(resp.content, 'html5lib')
self.proxy_list = ["%s:%s" % (x[2], x[3])
for x in [p.text.split('\n') for p in data.select(path)[1:]]]
return self
def check_proxy(self, proxies):
try:
assert requests.get('%s://httpbin.org/ip' % self.schema,
proxies=proxies, timeout=self.timeout).status_code == 200
return True
except Exception:
return False
@property
def proxy(self):
rand = random.randint(0, len(self.proxy_list) - 1)
res = {
self.schema: self.proxy_list[rand]
}
logging.info('Checking proxy %s' % str(res))
if not self.check_proxy(res):
logging.error(
'Failed to check proxy %s, Pop and Suffle Proxies!' % str(res))
self.proxy_list.pop(rand)
return self.proxy
logging.info('Using proxy %s' % str(res))
return res
@property
def session(self):
return http.HttpClient(proxies={
self.schema: '%s://%s' % (self.schema, self.proxy[self.schema])
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment