Last active
January 20, 2021 15:11
-
-
Save X-C3LL/d2eba167ab15ad83d53df1e374564b77 to your computer and use it in GitHub Desktop.
Infer prefixes/suffixes/common substrings inside a list of subdomains and build a dictionary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from difflib import SequenceMatcher | |
import sys | |
def words(words_file): | |
try: | |
with open(words_file, "r") as file: | |
words = file.read().split("\n")[:-1] | |
return words | |
except: | |
print("[!] Error: file could not be opened!") | |
#Based on https://stackoverflow.com/questions/58585052/find-most-common-substring-in-a-list-of-strings | |
def calculate_substrings(words): | |
substrings = {} | |
for i in range(0, len(words)): | |
for j in range(i + 1,len(words)): | |
string1 = words[i] | |
string2 = words[j] | |
match = SequenceMatcher(None, string1, string2).find_longest_match(0, len(string1), 0, len(string2)) | |
matching_substring = string1[match.a:match.a + match.size] | |
if matching_substring not in substrings: | |
substrings[matching_substring] = {} | |
substrings[matching_substring]["count"] = 1 | |
substrings[matching_substring]["pos"] = {} | |
else: | |
substrings[matching_substring]["count"] += 1 | |
if match.a == match.b: | |
if str(match.a) not in substrings[matching_substring]["pos"]: | |
substrings[matching_substring]["pos"][str(match.a)] = 1 | |
else: | |
substrings[matching_substring]["pos"][str(match.a)] += 1 | |
if (string1.rindex(matching_substring) - len(string1)) == (string2.rindex(matching_substring) - len(string2)): | |
if str(string1.rindex(matching_substring) - len(string1)) not in substrings[matching_substring]["pos"]: | |
substrings[matching_substring]["pos"][str(string1.rindex(matching_substring) - len(string1))] = 1 | |
else: | |
substrings[matching_substring]["pos"][str(string1.rindex(matching_substring) - len(string1))] += 1 | |
return substrings | |
def get_prefixes(words): | |
prefix_list = [] | |
for x in words: | |
if "0" in x[1]["pos"] and x[1]["count"] > 1: | |
prob = (100.0 * x[1]["pos"]["0"]) / x[1]["count"] | |
if prob > 30: | |
prefix_list.append(x[0]) | |
return cleaner(prefix_list) | |
def get_suffixes(words): | |
suffix_list = [] | |
for x in words: | |
if str(0 - len(x[0])) in x[1]["pos"] and x[1]["count"] > 1: | |
prob = (100.0 * x[1]["pos"][str(0 - len(x[0]))]) / x[1]["count"] | |
if prob > 30: | |
suffix_list.append(x[0]) | |
return cleaner(suffix_list) | |
def get_common_words(words): | |
common_list = [] | |
for x in words: | |
val = 0 | |
for y in x[1]["pos"].values(): | |
val += y | |
if val != 0 and (100.0 * val) / x[1]["count"] > 30: | |
common_list.append(x[0]) | |
return cleaner(common_list) | |
def get_mutable_suffixes(words, suffixes): | |
mutable_suff = [] | |
for x in words: | |
for y in suffixes: | |
if x[0 - len(y):] == y and len(y) != len(x) and x[:0 - len(y)] not in mutable_suff: | |
mutable_suff.append(x[:0 - len(y)]) | |
return mutable_suff | |
def get_mutable_prefixes(words, prefixes): | |
mutable_pref = [] | |
for x in words: | |
for y in prefixes: | |
if x[:len(y)] == y and len(y) != len(x) and x[:len(y)] not in mutable_pref: | |
mutable_pref.append(x[:len(y)]) | |
return mutable_pref | |
def cleaner(words): | |
new_list = [] | |
for x in words: | |
if x[0] == "-": | |
x = x[1:] | |
if x[-1] == "-": | |
x = x[:-1] | |
new_list.append(x) | |
for x in new_list: | |
for y in words: | |
if x in y and y in new_list: | |
new_list.remove(y) | |
return new_list | |
def build_terms(suf, pref, common): | |
terms = [] | |
for x in suf: | |
if x not in terms: | |
terms.append(x) | |
for y in pref: | |
if y not in terms: | |
terms.append(y) | |
for z in common: | |
if z not in terms: | |
terms.append(z) | |
return terms | |
def generate_dict(words, terms): | |
final = [] | |
mut_suff = get_mutable_suffixes(words, terms) | |
mut_pref = get_mutable_prefixes(words, terms) | |
for x in mut_suff: | |
for y in terms: | |
z = x + y | |
if z not in final: | |
final.append(z) | |
for x in mut_pref: | |
for y in terms: | |
z = y + x | |
if z not in final and z not in words: | |
final.append(z) | |
return final | |
if __name__ == "__main__": | |
# cat subdomains_found.txt | cut -d"." -f1 > wordlist.txt | |
if len(sys.argv) < 3: | |
print("[!] Error. Please provide word list and domain (python3 estigmergio.py words.txt whatever.com") | |
target = sys.argv[1] | |
domain = sys.argv[2] | |
original = words(target) | |
calculated = calculate_substrings(original) | |
reduced = {k: v for k, v in calculated.items() if len(k) > 2} | |
ordered = sorted(reduced.items(), key=lambda x:x[1]["count"], reverse=True) | |
pref = get_prefixes(ordered) | |
suff = get_suffixes(ordered) | |
common = get_common_words(ordered) | |
terms = build_terms(suff, pref, common) | |
subdomains = generate_dict(original, terms) | |
for x in subdomains: | |
print(x + "." + domain) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment