Last active
August 8, 2018 05:03
-
-
Save manasmbellani/6c4e2d230fb6b9c12a74eb1e51c42b81 to your computer and use it in GitHub Desktop.
recon_info_grepassets.py - script to parse domains, hashes, and ip addresses from a file/directory. Useful for info gathering phase of pentesting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import os | |
import re | |
import subprocess | |
import shlex | |
import requests | |
from argparse import ArgumentParser, RawTextHelpFormatter | |
DEFINITION = ("Greps the assets, IPs, and emails from a given input file, and " | |
"writes it to an output file") | |
def write_output_to_file(outputfile, output): | |
print("[+] Writing output to {}".format(outputfile)) | |
with open(outputfile, "wb+") as f: | |
f.write(output.encode("utf-8")) | |
def is_ip(content): | |
pat = "^(?:[0-9]{1,3}\.){3}(?:[0-9]{1,3})$" | |
ip = re.match(pat, content.strip(), re.M) | |
if ip: | |
return True | |
else: | |
return False | |
def extract_domains(content, num=None): | |
pat = "(?:[a-zA-Z0-9\-]+\.)+(?:[0-9A-Za-z]{2,6})" | |
domains = re.findall(pat, content, re.M) | |
if domains: | |
domains = [domain for domain in domains if not is_ip(domain)] | |
if num: | |
return domains[0:num] | |
else: | |
return domains | |
else: | |
return None | |
def extract_emails(content, num=None): | |
pat = "[a-zA-Z0-9\.\-]+\@[a-zA-Z0-9\.\-]+\.[a-zA-Z0-9]{2,6}" | |
emails = re.findall(pat, content, re.M) | |
if emails: | |
if num: | |
return emails[0:num] | |
else: | |
return emails | |
return emails | |
def extract_ips(content, num=None): | |
pat = "(?:[0-9]{1,3}\.){3}(?:[0-9]{1,3})" | |
ips = re.findall(pat, content, re.M) | |
if ips: | |
if num: | |
return ips[0:num] | |
else: | |
return ips | |
return ips | |
def extract_assets_from_file(filename): | |
with open(filename, "rb+") as f: | |
text = f.read().decode("utf-8") | |
emails = extract_emails(text) | |
ips = extract_ips(text) | |
domains = extract_domains(text) | |
return emails, ips, domains | |
def create_output_from_assets(all_emails, all_ips, all_domains): | |
output = "" | |
if all_emails: | |
output += "Emails\n" | |
output += "======\n" | |
for email in all_emails: | |
output += email + "\n" | |
output += "\n" | |
if all_ips: | |
output += "IPs\n" | |
output += "====\n" | |
for ip in all_ips: | |
output += ip + "\n" | |
output += "\n" | |
if all_domains: | |
output += "Domains\n" | |
output += "=======\n" | |
for domain in all_domains: | |
output += domain + "\n" | |
output += "\n" | |
return output | |
def combine_assets(emails, ips, domains, all_emails, all_ips, all_domains): | |
if emails is None: | |
emails = set() | |
if ips is None: | |
ips = set() | |
if domains is None: | |
domains = set() | |
if all_emails is None: | |
all_emails = set() | |
if all_ips is None: | |
all_ips = set() | |
if all_domains is None: | |
all_domains = set() | |
return (all_emails.union(emails), all_ips.union(ips), all_domains.union(domains)) | |
def extract_combine_assets_from_file(filename, all_emails, all_ips, all_domains): | |
emails, ips, domains = extract_assets_from_file(filename) | |
return combine_assets(emails, ips, domains, all_emails, all_ips, all_domains) | |
def main(): | |
parser = ArgumentParser(description=DEFINITION) | |
### TO_MODIFY: Define the arguments that script takes | |
parser.add_argument("--infile", dest="infile", action="store", required=True) | |
parser.add_argument("--outputfile", dest="outputfile", action="store", required=True) | |
parser.add_argument("--exclude-files", dest="excludefiles", action="store", default="", required=False) | |
### Read the arguments, and update executable locations to config | |
args = parser.parse_args() | |
config = vars(args) | |
### All output from commands executed are stored here | |
output ="" | |
### Store all emails, assets, ips found here | |
all_emails = set() | |
all_domains = set() | |
all_ips = set() | |
### Parse assets from files in a directory, filenames | |
infile = config["infile"] | |
if os.path.isdir(infile): | |
directory = infile | |
for dirpath, dirname, filenames in os.walk(directory): | |
### Only parse the files in the specified directory not other | |
### dirs that may exist | |
if dirpath == directory: | |
for filename in filenames: | |
filepath = os.path.join(dirpath, filename) | |
if filename not in config["excludefiles"].split(","): | |
print("[+] Parsing file {}".format(filepath)) | |
all_emails, all_ips, all_domains = extract_combine_assets_from_file(filepath, all_emails, all_ips, all_domains) | |
elif os.path.isfile(infile): | |
print("[+] Parsing file {}".format(infile)) | |
all_emails, all_ips, all_domains = extract_combine_assets_from_file(infile, | |
all_emails, | |
all_ips, | |
all_domains) | |
### Create output from the assets determined | |
output = create_output_from_assets(all_emails, all_ips, all_domains) | |
### Finally write the output to the specified file, if defined | |
if "outputfile" in config: | |
write_output_to_file(config["outputfile"], output) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment