Last active
January 9, 2024 18:49
-
-
Save lanmaster53/a0d3523279f3d1efdfe6d9dfc4da0d4a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Based on the initial work of Digininja at https://github.com/digininja/CeWL. While CeWL is a script written | |
in Ruby that requires an independent crawl of a website in order to build a custom wordlist, Whey CeWLer | |
runs within Portswigger's Burp Suite and parses an already crawled sitemap to build a custom wordlist. It | |
does not have the meta data parsing capabilities that CeWL does, but it more than makes up for it in | |
convenience. | |
The name gets its origins from the CeWLer portion of the CO2 Burp extension by Jason Gillam, which is written | |
in Java and does something similar, but Whey CeWLer is a completely reimagined extension written in Python, | |
making it "way cooler". | |
Usage: | |
1. Point Burp Suite to Jython in the Extender > Options tab. | |
2. Install this extension manually in the Extender > Extensions tab. | |
3. Select an option for extension output (File, Console or UI). | |
4. Right-click on any element in the Target tab's hierarchical sitemap. | |
5. Select the Extensions > Create wordlist context menu item. | |
The wordlist is created to wherever you have the extension configured for output. | |
''' | |
from burp import IBurpExtender | |
from burp import IContextMenuFactory | |
from javax.swing import JMenuItem | |
from java.util import ArrayList, List | |
from HTMLParser import HTMLParser | |
from datetime import datetime | |
import re | |
COMMON_PASSWORDS = ['password'] | |
TEXT_CONTENT_TYPES = ['text/html', 'application/xml', 'application/json', 'text/plain'] | |
# helpful resource | |
# https://github.com/laconicwolf/burp-extensions/blob/master/GenerateForcedBrowseWordlist.py | |
class TagStripper(HTMLParser): | |
''' | |
Attempts to strip all tags from an HTML page recieved in the HTTP response. The remaining text | |
is appended to an array and then joined with " " for regex parsing. | |
''' | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.page_text = [] | |
def handle_data(self, data): | |
self.page_text.append(data) | |
def handle_comment(self, data): | |
self.page_text.append(data) | |
def strip(self, html_page): | |
self.feed(html_page) | |
return " ".join(self.page_text) | |
class BurpExtender(IBurpExtender, IContextMenuFactory): | |
''' | |
BurpExtender Class as per Reference API. | |
''' | |
def registerExtenderCallbacks(self, callbacks): | |
''' | |
Registers the extension and initializes the root URLs and word list sets. | |
''' | |
self._callbacks = callbacks | |
self._helpers = callbacks.getHelpers() | |
self.context = None | |
self.roots = set() | |
self.word_list = set(COMMON_PASSWORDS) | |
callbacks.setExtensionName("Whey CeWLer") | |
callbacks.registerContextMenuFactory(self) | |
return | |
def createMenuItems(self, context): | |
''' | |
Invokes the "Create Wordlist" Menu. | |
''' | |
self.context = context | |
if context.getInvocationContext() == context.CONTEXT_TARGET_SITE_MAP_TREE: | |
menu_list = ArrayList() | |
menu_item = JMenuItem("Create Wordlist", actionPerformed=self.menu_action) | |
menu_list.add(menu_item) | |
return menu_list | |
return | |
def menu_action(self, event): | |
''' | |
Obtains the selected messages from the interface. Filters the sitmap for all messages containing | |
URLs within the selected messages' hierarchy. If so, the message is analyzed to create a word list. | |
''' | |
# get all first-level selected messages and store the URLs as roots to filter the sitemap | |
http_messages = self.context.getSelectedMessages() | |
for http_message in http_messages: | |
root = str(http_message.getUrl()) | |
self.roots.add(root) | |
# get all sitemap entries associated with the selected messages and scrape them for words | |
for http_message in self._callbacks.getSiteMap(None): | |
url = http_message.getUrl().toString() | |
for root in self.roots: | |
# will scrape the same URL multiple times if the site map has stored multiple instances | |
# the site map stores multiple instances if it detects differences, so this is desirable | |
if url.startswith(root): | |
# only scrape if there is a response to scrape | |
http_response = http_message.getResponse() | |
if http_response: | |
self.get_words(url, http_response) | |
self.display_words() | |
return | |
def get_words(self, url, http_response): | |
''' | |
Checks the header for a text-based content type. If the content type is text-based, uses | |
the TagStripper class to parse out the text and runs a regex to create a wordlist based on | |
the regex criteria. The resulting words are added to the word_list set. | |
''' | |
response = self._helpers.analyzeResponse(http_response) | |
headers = response.getHeaders()[1:] | |
body = self._helpers.bytesToString(http_response[response.getBodyOffset():]) | |
for header in headers: | |
name, value = [x.strip() for x in header.split(':', 1)] | |
if name.lower() == 'content-type': | |
content_type = value.split(';')[0].strip() | |
if content_type.lower() not in TEXT_CONTENT_TYPES: | |
return | |
tag_stripper = TagStripper() | |
page_text = tag_stripper.strip(body) | |
# alpha numerics and apostrophes | |
# at least 3 characters in length | |
word_candidates = re.findall(r"[\w']{3,}", page_text) | |
for word in word_candidates: | |
# strip apostrophes | |
word = word.replace("'", "") | |
# add the word to the list | |
self.word_list.add(word) | |
return | |
def display_words(self): | |
''' | |
Displays the word list to whatever Burp is configured for stdout. | |
''' | |
for word in sorted(self.word_list): | |
print word | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment