Created
May 16, 2017 22:46
-
-
Save smartm13/435a404d2a6415c8b1a4b0302560527e to your computer and use it in GitHub Desktop.
A serp script to find out rank of keyword for particular website..... beauty is final callable
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getval(s,f,t): | |
st=s.find(f)+len(f) | |
return s[st:s.find(t,st)] | |
def dsp(zrc): | |
pass#with open("C:\Users/smartm13/Desktop/op.txt.html",'w') as f:f.write(zrc) | |
proxies=[] | |
def hidemyass(config='1649468'):# and ('1870962' or '1870954')): | |
import requests | |
rg=requests.get | |
s=rg('http://proxylist.hidemyass.com/search-'+config+'#listable').content | |
from bs4 import BeautifulSoup as bsp | |
z=bsp(s,'html.parser') | |
tbody=z.find_all('td') | |
tb=[tbody[x] for x in range(1,len(tbody),8)] | |
tip=[] | |
tp=[tbody[x].text.strip() for x in range(2,len(tbody),8)] | |
tpr=[tbody[x].text.strip().lower() for x in range(6,len(tbody),8)] | |
for t in tb: | |
ip,gayab="",[] | |
for x in t.span.contents: | |
if not x.name:ip+=str(x).strip() | |
if x.name=='style': | |
gayab+=[getval(y,'.','{') for y in x.text.strip().split('\n') if 'none' in y] | |
continue | |
if x.name in ['div','span']: | |
if 'style' in x.attrs and 'none' in str(x['style']):continue | |
if 'class' in x.attrs and x['class'][0] in gayab:continue | |
ip+=x.text.strip() | |
tip.append(ip) | |
global proxies | |
proxies=[str(tpr[x])+"://"+str(tip[x])+":"+str(tp[x]) for x in range(len(tip))] | |
return proxies | |
cache={} | |
prxoff=0 | |
def getprxy(new=0,tm=500,hidemyassth=1): | |
global cache | |
if not new:return cache or (getprxy(1,tm,abs(prxoff-1)) if prxoff else {}) | |
import requests,json | |
try:cache={"http":hidemyass()[hidemyassth-1]} if hidemyassth else {"http":json.loads(requests.get('http://gimmeproxy.com/api/getProxy?anonymityLevel=1&supportsHttps=true&protocol=http&get=true&cookies=true&maxCheckPeriod={}'.format(tm)).content)['curl']} | |
except:cache={} | |
return cache | |
def gs(q,india=1,num=10,start=0): | |
import requests,urllib | |
gdomain='www.google.co.in' if india else 'www.google.com' | |
uac={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'} | |
#cookies={} if india else requests.get('http://'+gdomain+'/ncr',proxies=getprxy(),headers=uac).cookies.get_dict() | |
cookies={} if india else requests.get('http://'+gdomain+'/ncr',headers=uac).cookies.get_dict() | |
#return requests.get('http://'+gdomain+'/search?q='+urllib.quote_plus(q)+'&num='+str(num)+'&start='+str(start),headers=uac,cookies=cookies,proxies=getprxy()).content | |
return requests.get('http://'+gdomain+'/search?q='+urllib.quote_plus(q)+'&num='+str(num)+'&start='+str(start),headers=uac,cookies=cookies).content | |
def gl(q,india=1,num=10,start=0): | |
from bs4 import BeautifulSoup as bsp | |
s=gs(q,india,num,start) | |
dsp(s) | |
check='detected unusual traffic from your computer network' | |
if check in s:return "CAPTCHA#FAILED" | |
z=bsp(s,'html.parser') | |
zz=z.find_all('div',id='ires') | |
try: | |
m=zz[0].find_all('cite') | |
#return [x.text for x in m] | |
mm=[x.findParent('div',attrs={'class':'g'}).find_all('h3',attrs={'class':'r'})[0].find_all('a')[0]['href'] for x in m if x.findParent('div',attrs={'class':'g'}) ] | |
except:print 'dlocha: q={},num={},start={},india={}'.format(q,num,start,india) | |
return [getval(xm,'url?q=','&') for xm in mm] | |
def pup(keyword,domain,india=1,num=10,start=0): | |
r=gl(keyword,india,num,start) | |
if type(r)==type('zxc'):return r | |
from urlparse import urlparse as urp | |
rank=[(x+1,1+int(start/num),r[x].encode('ascii','ignore')) for x in range(len(r)) if domain in urp(r[x]).netloc] | |
return rank | |
def rank1st(keyword,domain,india=1,gap=10,pgnostop=15): | |
r=[] | |
start=0 | |
while (not r) and pgnostop: | |
r=pup(keyword,domain,india,gap,start) | |
start+=gap | |
pgnostop-=1 | |
return r#[0,0,'Locha'] if len(r)<1 else r[0] | |
def beauty(keyword,domain,india=1,gap=10,pgnostop=15,sep='\t',fast=0,prxy=0,isbeauty=1): | |
""" AbsRank, Rank on PagNo ,url""" | |
global prxoff#prxy=0off,1on,2...hidemyassno | |
prxoff=prxy | |
try: | |
amt=100*pgnostop | |
absrank=rank1st(keyword,domain,india,100,pgnostop) | |
if type('str')==type(absrank):return "{0}{sep}{0} on {0}{sep}{1}".format(0,absrank,sep=sep) | |
if not len(absrank):0/0 | |
absrank=absrank[0] | |
absr=absrank[0]-100+absrank[1]*100 | |
rank=(absr%gap),1+int(absr/gap),absrank[2] | |
if 0 and not fast: | |
amt=gap*pgnostop | |
rank=rank1st(keyword,domain,india,gap,pgnostop) | |
if type('str')==type(rank):return "{0}{sep}{0} on {0}{sep}{1}".format(0,rank,sep=sep) | |
if not len(rank):0/0 | |
rank=rank[0] | |
except ZeroDivisionError:return "{0}{sep}{0} on {0}{sep}{1}".format(0,'Locha: No result in first {} pages ({} results scanned)'.format(pgnostop,amt),sep=sep) if isbeauty else {'rank':'-2','url':'not found in 1st {} results'.format(amt)} | |
rrank=rank[0] | |
pgno=rank[1] | |
url=rank[2] | |
apgno=absrank[1] | |
aurl=absrank[2] | |
arank=absrank[0] if str(apgno)=='1' else ('~'+str(absrank[0]+gap*(apgno-1))) | |
furl=url if (url==aurl) else url+' OR '+aurl | |
if not isbeauty:return {'rank':str(absr),'url':url[2:]} | |
return "{0}{sep}{1} on {2}{sep}{3}".format(arank,rrank,pgno,furl,sep=sep) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment