Skip to content

Instantly share code, notes, and snippets.

@komljen
Created December 3, 2013 08:24
Show Gist options
  • Save komljen/7765800 to your computer and use it in GitHub Desktop.
Save komljen/7765800 to your computer and use it in GitHub Desktop.
USAGE: python search_domains.py -r 500
#!/usr/bin/env python
__author__ = 'Alen Komljen'
import urllib.request, re, time, argparse, os, platform
from socket import timeout
parser = argparse.ArgumentParser()
parser.add_argument('-r', action='store', dest='results_number', required=True, type=int, \
help='number of google results to check')
args = parser.parse_args()
max_results = args.results_number
start = 0
end = max_results - 100
url_list = []
google_url = "http://www.google.com/search?q=site%3A.ba&num=100&start="
pwd = os.getcwd()
system = platform.system()
if system == "Windows":
ba_domains = open(pwd + "\\domains_google.txt", "w")
elif system == "Linux":
ba_domains = open(pwd + "/domains_google.txt", "w")
else:
print("Unsupported system!")
if max_results % 100 != 0 or max_results = start:
request = urllib.request.Request(google_url + str(start))
request.add_header("User-Agent","Mozilla/5.0")
try:
response = urllib.request.urlopen(request, timeout=10)
html = response.read()
except:
continue
url_match = re.findall("url\?q=http:\/\/([a-z\.]*.ba)", str(html))
if url_match != "":
for url in url_match:
url_list.append(url)
print ("Results from: " + str(start) + " - " + str(100 + start) + " finished, wait 30 seconds...")
start+=100
time.sleep(30)
url_list_sorted = sorted(set(url_list))
for x in url_list_sorted:
ba_domains.write(x + "\n")
ba_domains.close()
print("Completed! Results added to file: " + ba_domains.name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment