Skip to content

Instantly share code, notes, and snippets.

@X-C3LL
Last active January 20, 2021 15:11
Show Gist options
  • Save X-C3LL/d2eba167ab15ad83d53df1e374564b77 to your computer and use it in GitHub Desktop.
Save X-C3LL/d2eba167ab15ad83d53df1e374564b77 to your computer and use it in GitHub Desktop.
Infer prefixes/suffixes/common substrings inside a list of subdomains and build a dictionary
#!/usr/bin/env python3
from difflib import SequenceMatcher
import sys
def words(words_file):
try:
with open(words_file, "r") as file:
words = file.read().split("\n")[:-1]
return words
except:
print("[!] Error: file could not be opened!")
#Based on https://stackoverflow.com/questions/58585052/find-most-common-substring-in-a-list-of-strings
def calculate_substrings(words):
substrings = {}
for i in range(0, len(words)):
for j in range(i + 1,len(words)):
string1 = words[i]
string2 = words[j]
match = SequenceMatcher(None, string1, string2).find_longest_match(0, len(string1), 0, len(string2))
matching_substring = string1[match.a:match.a + match.size]
if matching_substring not in substrings:
substrings[matching_substring] = {}
substrings[matching_substring]["count"] = 1
substrings[matching_substring]["pos"] = {}
else:
substrings[matching_substring]["count"] += 1
if match.a == match.b:
if str(match.a) not in substrings[matching_substring]["pos"]:
substrings[matching_substring]["pos"][str(match.a)] = 1
else:
substrings[matching_substring]["pos"][str(match.a)] += 1
if (string1.rindex(matching_substring) - len(string1)) == (string2.rindex(matching_substring) - len(string2)):
if str(string1.rindex(matching_substring) - len(string1)) not in substrings[matching_substring]["pos"]:
substrings[matching_substring]["pos"][str(string1.rindex(matching_substring) - len(string1))] = 1
else:
substrings[matching_substring]["pos"][str(string1.rindex(matching_substring) - len(string1))] += 1
return substrings
def get_prefixes(words):
prefix_list = []
for x in words:
if "0" in x[1]["pos"] and x[1]["count"] > 1:
prob = (100.0 * x[1]["pos"]["0"]) / x[1]["count"]
if prob > 30:
prefix_list.append(x[0])
return cleaner(prefix_list)
def get_suffixes(words):
suffix_list = []
for x in words:
if str(0 - len(x[0])) in x[1]["pos"] and x[1]["count"] > 1:
prob = (100.0 * x[1]["pos"][str(0 - len(x[0]))]) / x[1]["count"]
if prob > 30:
suffix_list.append(x[0])
return cleaner(suffix_list)
def get_common_words(words):
common_list = []
for x in words:
val = 0
for y in x[1]["pos"].values():
val += y
if val != 0 and (100.0 * val) / x[1]["count"] > 30:
common_list.append(x[0])
return cleaner(common_list)
def get_mutable_suffixes(words, suffixes):
mutable_suff = []
for x in words:
for y in suffixes:
if x[0 - len(y):] == y and len(y) != len(x) and x[:0 - len(y)] not in mutable_suff:
mutable_suff.append(x[:0 - len(y)])
return mutable_suff
def get_mutable_prefixes(words, prefixes):
mutable_pref = []
for x in words:
for y in prefixes:
if x[:len(y)] == y and len(y) != len(x) and x[:len(y)] not in mutable_pref:
mutable_pref.append(x[:len(y)])
return mutable_pref
def cleaner(words):
new_list = []
for x in words:
if x[0] == "-":
x = x[1:]
if x[-1] == "-":
x = x[:-1]
new_list.append(x)
for x in new_list:
for y in words:
if x in y and y in new_list:
new_list.remove(y)
return new_list
def build_terms(suf, pref, common):
terms = []
for x in suf:
if x not in terms:
terms.append(x)
for y in pref:
if y not in terms:
terms.append(y)
for z in common:
if z not in terms:
terms.append(z)
return terms
def generate_dict(words, terms):
final = []
mut_suff = get_mutable_suffixes(words, terms)
mut_pref = get_mutable_prefixes(words, terms)
for x in mut_suff:
for y in terms:
z = x + y
if z not in final:
final.append(z)
for x in mut_pref:
for y in terms:
z = y + x
if z not in final and z not in words:
final.append(z)
return final
if __name__ == "__main__":
# cat subdomains_found.txt | cut -d"." -f1 > wordlist.txt
if len(sys.argv) < 3:
print("[!] Error. Please provide word list and domain (python3 estigmergio.py words.txt whatever.com")
target = sys.argv[1]
domain = sys.argv[2]
original = words(target)
calculated = calculate_substrings(original)
reduced = {k: v for k, v in calculated.items() if len(k) > 2}
ordered = sorted(reduced.items(), key=lambda x:x[1]["count"], reverse=True)
pref = get_prefixes(ordered)
suff = get_suffixes(ordered)
common = get_common_words(ordered)
terms = build_terms(suff, pref, common)
subdomains = generate_dict(original, terms)
for x in subdomains:
print(x + "." + domain)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment