Created
June 3, 2025 18:43
-
-
Save robertdevore/1ea370580e3dedff82558342f0c396ad to your computer and use it in GitHub Desktop.
Scan PHP files for inline <style> and <script> tags.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import csv | |
from openpyxl import Workbook | |
def find_php_files(base_path): | |
""" | |
Recursively find all .php files in the base directory. | |
Args: | |
base_path (str): Path to search from. | |
Returns: | |
list: List of PHP file paths. | |
""" | |
php_files = [] | |
for root, _, files in os.walk(base_path): | |
for file in files: | |
if file.endswith('.php'): | |
php_files.append(os.path.join(root, file)) | |
return php_files | |
def contains_tag(content, tag): | |
""" | |
Check if the content contains a specific HTML tag. | |
Args: | |
content (str): File content. | |
tag (str): Tag to search for, e.g. 'script', 'style'. | |
Returns: | |
bool: True if tag is found, False otherwise. | |
""" | |
pattern = re.compile(rf"<{tag}\b", re.IGNORECASE) | |
return bool(pattern.search(content)) | |
def scan_php_files(php_files): | |
""" | |
Scan each PHP file for <script> and <style> tags. | |
Args: | |
php_files (list): List of PHP file paths. | |
Returns: | |
tuple: (list of files with <script>, list of files with <style>) | |
""" | |
script_matches = [] | |
style_matches = [] | |
for path in php_files: | |
try: | |
with open(path, 'r', encoding='utf-8', errors='ignore') as f: | |
content = f.read() | |
if contains_tag(content, 'script'): | |
script_matches.append(path) | |
if contains_tag(content, 'style'): | |
style_matches.append(path) | |
except Exception as e: | |
print(f"Error reading {path}: {e}") | |
return script_matches, style_matches | |
def save_to_excel(script_files, style_files, output_file='tagged_php_files.xlsx'): | |
""" | |
Save the matched file paths into an Excel file with two tabs. | |
Args: | |
script_files (list): List of files with <script> tags. | |
style_files (list): List of files with <style> tags. | |
output_file (str): Path to save the Excel file. | |
""" | |
wb = Workbook() | |
# Script tag sheet | |
ws_script = wb.active | |
ws_script.title = "script_tags" | |
ws_script.append(["Files with <script> tag"]) | |
for file in script_files: | |
ws_script.append([file]) | |
# Style tag sheet | |
ws_style = wb.create_sheet(title="style_tags") | |
ws_style.append(["Files with <style> tag"]) | |
for file in style_files: | |
ws_style.append([file]) | |
wb.save(output_file) | |
print(f"\nβ Saved results to {output_file}") | |
def main(): | |
base_path = os.getcwd() | |
php_files = find_php_files(base_path) | |
print(f"π Scanning {len(php_files)} PHP files...") | |
script_files, style_files = scan_php_files(php_files) | |
print(f"π Found {len(script_files)} files with <script> tags") | |
print(f"π Found {len(style_files)} files with <style> tags") | |
save_to_excel(script_files, style_files) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment