Created
December 3, 2013 08:24
-
-
Save komljen/7765800 to your computer and use it in GitHub Desktop.
USAGE: python search_domains.py -r 500
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = 'Alen Komljen' | |
import urllib.request, re, time, argparse, os, platform | |
from socket import timeout | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-r', action='store', dest='results_number', required=True, type=int, \ | |
help='number of google results to check') | |
args = parser.parse_args() | |
max_results = args.results_number | |
start = 0 | |
end = max_results - 100 | |
url_list = [] | |
google_url = "http://www.google.com/search?q=site%3A.ba&num=100&start=" | |
pwd = os.getcwd() | |
system = platform.system() | |
if system == "Windows": | |
ba_domains = open(pwd + "\\domains_google.txt", "w") | |
elif system == "Linux": | |
ba_domains = open(pwd + "/domains_google.txt", "w") | |
else: | |
print("Unsupported system!") | |
if max_results % 100 != 0 or max_results = start: | |
request = urllib.request.Request(google_url + str(start)) | |
request.add_header("User-Agent","Mozilla/5.0") | |
try: | |
response = urllib.request.urlopen(request, timeout=10) | |
html = response.read() | |
except: | |
continue | |
url_match = re.findall("url\?q=http:\/\/([a-z\.]*.ba)", str(html)) | |
if url_match != "": | |
for url in url_match: | |
url_list.append(url) | |
print ("Results from: " + str(start) + " - " + str(100 + start) + " finished, wait 30 seconds...") | |
start+=100 | |
time.sleep(30) | |
url_list_sorted = sorted(set(url_list)) | |
for x in url_list_sorted: | |
ba_domains.write(x + "\n") | |
ba_domains.close() | |
print("Completed! Results added to file: " + ba_domains.name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment