Created
February 8, 2019 15:15
-
-
Save waqaraqeel/d05a28fac3f094adf55b2f8ed46f6e1e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
This is a quick and dirty script written out of pure annoyance at proofpoint | |
URLdefense making emails unreadable and unusable. Phishing is dangerous, we get | |
it. Maybe you code change the hyperlink instead of the actual text in the email? | |
This script will decode all URLs in stdin. | |
Adapted from https://help.proofpoint.com/Threat_Insight_Dashboard/Concepts/How_do_I_decode_a_rewritten_URL%3F. | |
Thank you to proofpoint for providing it. | |
Usage: ./urldefense_decoder.py < encoded_urls > decoded_urls | |
""" | |
__author__ = "Waqar Aqeel" | |
__version__ = "1.0" | |
__license__ = "MIT" | |
import sys | |
import re | |
import urllib.parse | |
import html.parser | |
re_v1 = re.compile(r"u=(.+?)&k=") | |
re_v2 = re.compile(r"u=(.+?)&[dc]=") | |
re_url = re.compile(r"(https://urldefense.proofpoint.com/(?P<version>v[0-9])/.*)") | |
def main(): | |
input_str = sys.stdin.read() | |
matches = re_url.finditer(input_str) | |
last_stop = 0 | |
for match in matches: | |
rewrittenurl = match.group(0) | |
print(input_str[last_stop : match.start()], end="") | |
last_stop = match.start() + len(rewrittenurl) | |
if match.group("version") == "v1": | |
print(decodev1(rewrittenurl), end="") | |
elif match.group("version") == "v2": | |
print(decodev2(rewrittenurl), end="") | |
else: | |
raise ValueError("Unrecognized version in: " + match.group(0)) | |
print(input_str[last_stop:]) | |
def decodev1(rewrittenurl): | |
match = re_v1.search(rewrittenurl) | |
if match: | |
urlencodedurl = match.group(1) | |
htmlencodedurl = urllib.parse.unquote(urlencodedurl) | |
url = html.parser.HTMLParser().unescape(htmlencodedurl) | |
return url | |
else: | |
return rewrittenurl | |
def decodev2(rewrittenurl): | |
match = re_v2.search(rewrittenurl) | |
if match: | |
specialencodedurl = match.group(1) | |
trans = str.maketrans("-_", "%/") | |
urlencodedurl = specialencodedurl.translate(trans) | |
htmlencodedurl = urllib.parse.unquote(urlencodedurl) | |
url = html.parser.HTMLParser().unescape(htmlencodedurl) | |
return url | |
else: | |
return rewrittenurl | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment