Forked from tillson/gist:620e8ef87bc057f25b0a27c423433fda
Created
October 9, 2022 20:08
-
-
Save sudosuraj/859b85505abbf4553a024340e9912b3e to your computer and use it in GitHub Desktop.
Decode Base64 strings in a git repo's commit history
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pydriller import RepositoryMining | |
import re | |
import base64 | |
foundSet = set() | |
for commit in RepositoryMining('./').traverse_commits(): | |
for mod in commit.modifications: | |
if mod.source_code_before != None: | |
regex = re.findall(r"<text encoding=\"base64\">[^>]+</text>", mod.source_code_before) | |
for result in regex: | |
based = str(base64.b64decode(result[len("<text encoding='base64'>"):-len("</text>")])) | |
if based not in foundSet: | |
print(based) | |
foundSet.add(based + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment