olsososo · June 18, 2014 05:33
diff --git a/block.py b/block.py
 import requests
 from lxml import etree

 def main(keyword, pages):
 	f = open('./domain.txt', 'a')
 	domains = []
 	pn = 0

 	for page in xrange(pages):
 		for k in keyword:
 			url = "http://www.baidu.com/s?wd="+k+"&pn="+str(pn)+"&oq="+k+"&tn=monline_dg&ie=utf-8&usm=1"
 			print url
 			r = requests.get(url, timeout=15)
 			htmlElement = etree.HTML(r.content)

 			try:
 				for i in htmlElement.xpath('.//span[@class="g"]'):
 					href = etree.tostring(i).replace('<b>','').replace('</b>','').replace('<span class="g">','').replace('</span>','')
 					if '/' in href:
 						doamin = href.split('/')[0]
 						if doamin not in domains:
 							domains.append(doamin)
 			except Exception:
 				pass
 		pn = (page+1) * 10

 	for doamin in domains:
 		f.write('127.0.0.2 '+doamin+'\r\n')

 	f.close()


 if __name__ == '__main__':
 	keyword = []
 	main(keyword, 100)
	import requests
	from lxml import etree

	def main(keyword, pages):
	f = open('./domain.txt', 'a')
	domains = []
	pn = 0

	for page in xrange(pages):
	for k in keyword:
	url = "http://www.baidu.com/s?wd="+k+"&pn="+str(pn)+"&oq="+k+"&tn=monline_dg&ie=utf-8&usm=1"
	print url
	r = requests.get(url, timeout=15)
	htmlElement = etree.HTML(r.content)

	try:
	for i in htmlElement.xpath('.//span[@class="g"]'):
	href = etree.tostring(i).replace('<b>','').replace('</b>','').replace('<span class="g">','').replace('</span>','')
	if '/' in href:
	doamin = href.split('/')[0]
	if doamin not in domains:
	domains.append(doamin)
	except Exception:
	pass
	pn = (page+1) * 10

	for doamin in domains:
	f.write('127.0.0.2 '+doamin+'\r\n')

	f.close()


	if __name__ == '__main__':
	keyword = []
	main(keyword, 100)
No results found