Last active
January 20, 2020 13:06
-
-
Save Cguilliman/f171e76939baa350c4dbc2d97140bf2d to your computer and use it in GitHub Desktop.
Proxy module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup as bs | |
from apps.proxy.models import Proxy | |
PROXY_URL = "https://www.sslproxies.org/" | |
TEST_URL = "https://www.google.com/" | |
class ProxyGetter: | |
def remote_proxies(self): | |
response = requests.get(PROXY_URL) | |
soup = bs(response.content, "lxml") | |
trs = soup.select('tr', {'role': 'row'}) | |
for tr in trs[1:21]: | |
tds = tr.select('td') | |
yield tds[0].text + ':' + tds[1].text | |
def proxies(self): | |
for proxy in Proxy.objects.valid(): | |
yield proxy.host | |
for remote_host in self.remote_proxies(): | |
if (exists_proxy := Proxy.objects.filter(host=remote_host).first()): | |
if exists_proxy.valid: | |
return exists_proxy.host | |
continue | |
proxy = Proxy.objects.create(host=remote_host) | |
yield proxy.host | |
def test_request(self, host): | |
try: | |
response = requests.get( | |
TEST_URL, timeout=10, | |
proxies={ | |
'http': f"http://{host}", | |
'https': f"https://{host}" | |
} | |
) | |
return True | |
except: | |
return False | |
def update_proxies(self): | |
""" | |
Update proxy list | |
Iterate hosts make test request and save invalid status failed request | |
Get actual remote list of proxies and save not exists in db | |
""" | |
for proxy in Proxy.objects.valid(): | |
if (is_valid := self.test_request(proxy.host)): | |
proxy.is_valid = False | |
proxy.save() | |
for remote_host in self.remote_proxies(): | |
if not Proxy.objects.filter(host=remote_host).exists(): | |
Proxy.objects.create( | |
host=remote_host, | |
is_valid=self.test_request(remote_host) | |
) | |
def write_as_invalid(self, host): | |
proxy, _ = Proxy.objects.get_or_create(host=host) | |
if proxy.is_valid: | |
proxy.is_valid = False | |
proxy.save() | |
proxy_getter = ProxyGetter() | |
================= Models ================= | |
from django.db import models | |
from django.utils.translation import ugettext_lazy as _ | |
class ProxyQuerySet(models.QuerySet): | |
def valid(self): | |
return self.filter(is_valid=True) | |
class Proxy(models.Model): | |
host = models.CharField( | |
verbose_name=_('Host'), | |
max_length=255 | |
) | |
is_valid = models.BooleanField( | |
verbose_name=_('Is valid'), | |
default=True | |
) | |
created_at = models.DateTimeField( | |
verbose_name=_("Created at"), | |
auto_now_add=True | |
) | |
objects = ProxyQuerySet.as_manager() | |
class Meta: | |
verbose_name = _("Proxy") | |
verbose_name_plural = _("Proxies") | |
def __str__(self): | |
return self.host |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment