Last active
November 7, 2017 10:59
-
-
Save iKlotho/e32e52b408bedda0a4bb8b2763859b9a to your computer and use it in GitHub Desktop.
hidemyass-proxy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#hidemyarse | |
import requests | |
import BeautifulSoup as BS | |
headers = {'User-agent': | |
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0'} | |
def find_inline_words(checkraw_word): | |
inline = [] | |
none = [] | |
if len(checkraw_word) == 2: | |
inline.append(checkraw_word[1]) | |
none.append(checkraw_word[0]) | |
else: | |
inline = [v for v in checkraw_word[1:len(checkraw_word)-1:2]] | |
none = [j for j in checkraw_word[0:len(checkraw_word)-1:2]] | |
in_words = [] | |
none_words = [] | |
for word in inline: | |
in_words.append(word[1:5]) | |
for nonew in none: | |
none_words.append(nonew[1:5]) | |
return in_words,none_words | |
def get_ip(spans,inw,outw): | |
ip_adress = "" | |
if '\n' in spans: | |
spans.remove('\n') | |
if ' ' in spans: | |
spans.remove(' ') | |
for span in spans: | |
try: | |
style = span['style'] | |
except (KeyError,TypeError): | |
style = None | |
try: | |
classs = span['class'] | |
except (KeyError,TypeError): | |
classs = None | |
if style != None: | |
is_inline = style.split(':')[1].strip() | |
if is_inline == "inline": | |
ip_adress = ip_adress + span.text | |
elif classs != None: | |
if classs in inw or classs not in outw: | |
ip_adress = ip_adress + span.text | |
else: | |
if type(span) == BS.NavigableString: | |
ip_adress = ip_adress + span | |
return ip_adress | |
if __name__ == "__main__": | |
url = "http://proxylist.hidemyass.com" | |
r = requests.get(url,headers=headers) | |
soup = BS.BeautifulSoup(r.content) | |
ip_list = [] | |
ipraw = soup.findAll('tr',{'class':'altshade'}) #raw tr list | |
for i in range(len(ipraw)): | |
checkraw_word= ipraw[i].style.text.split('\n') | |
inw,outw = find_inline_words(checkraw_word) #get inline and none words | |
inli = ipraw[i].findAll('span')[1].find('style') | |
inli.extract() # remove inline and none tags | |
spans = ipraw[i].findAll('span')[1].contents | |
host = ipraw[i].findAll('td')[2].text | |
ip_list.append(get_ip(spans,inw,outw).strip() + ":" +host ) | |
with open('hidemyarse.txt', 'w') as f: | |
f.write("\n".join(ip_list)) | |
f.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment