SoMaCoSF · April 30, 2025 22:32
diff --git a/readme.md b/readme.md
diff --git a/code-sanitizer.tsx b/code-sanitizer.tsx
 import React, { useState, useEffect } from 'react';
 import { Clipboard, Info, BarChart } from 'lucide-react';
 import _ from 'lodash';

 // Main component for the Code Sanitizer Tool
 const CodeSanitizer = () => {
  const [inputCode, setInputCode] = useState('');
  const [cleanedCode, setCleanedCode] = useState('');
  const [processing, setProcessing] = useState(false);
  const [copySuccess, setCopySuccess] = useState(false);
  const [showDetails, setShowDetails] = useState(false);
  const [detectedIssues, setDetectedIssues] = useState([]);
  const [codeStats, setCodeStats] = useState(null);
  const [showCodeAnalysis, setShowCodeAnalysis] = useState(false);

  // Process input code when it changes
  useEffect(() => {
    if (inputCode) {
      processCode(inputCode);
    } else {
      setCleanedCode('');
      setDetectedIssues([]);
      setCodeStats(null);
    }
  }, [inputCode]);

  // Process the code to remove invisible characters and normalize it
  const processCode = (code) => {
    setProcessing(true);
    
    // Create a list to store detected issues
    const issues = [];
    const issueDetails = {};

    // Step 1: Detect and replace zero-width characters
    const zeroWidthPattern = /[\u200B-\u200D\uFEFF\u2060]/g;
    let zeroWidthMatches = code.match(zeroWidthPattern) || [];
    const hasZeroWidth = zeroWidthMatches.length > 0;
    if (hasZeroWidth) {
      issues.push('Zero-width characters detected and removed');
      issueDetails['zeroWidth'] = {
        count: zeroWidthMatches.length,
        description: 'Invisible characters that can hide malicious code'
      };
    }
    let processed = code.replace(zeroWidthPattern, '');

    // Step 2: Handle non-standard whitespace characters
    const nonStandardWhitespace = /[\u00A0\u2000-\u200A\u202F\u205F\u3000]/g;
    let whitespaceMatches = processed.match(nonStandardWhitespace) || [];
    const hasNonStandardWhitespace = whitespaceMatches.length > 0;
    if (hasNonStandardWhitespace) {
      issues.push('Non-standard whitespace characters normalized');
      issueDetails['whitespace'] = {
        count: whitespaceMatches.length,
        description: 'Unusual whitespace characters that look like spaces but have different code points'
      };
    }
    processed = processed.replace(nonStandardWhitespace, ' ');

    // Step 3: Handle homoglyphs (characters that look similar but are different)
    const homoglyphMap = {
      'ｎ': 'n', 'ｏ': 'o', 'ｐ': 'p', 'ｑ': 'q', 'ｒ': 'r', 'ｓ': 's', 'ｔ': 't',
      'ｕ': 'u', 'ｖ': 'v', 'ｗ': 'w', 'ｘ': 'x', 'ｙ': 'y', 'ｚ': 'z', 'ａ': 'a',
      'ｂ': 'b', 'ｃ': 'c', 'ｄ': 'd', 'ｅ': 'e', 'ｆ': 'f', 'ｇ': 'g', 'ｈ': 'h',
      'ｉ': 'i', 'ｊ': 'j', 'ｋ': 'k', 'ｌ': 'l', 'ｍ': 'm', '０': '0', '１': '1',
      '２': '2', '３': '3', '４': '4', '５': '5', '６': '6', '７': '7', '８': '8',
      '９': '9', '；': ';', '（': '(', '）': ')', '［': '[', '］': ']', '｛': '{',
      '｝': '}', '：': ':', '，': ',', '．': '.', '＋': '+', '－': '-', '＝': '=',
      '＊': '*', '／': '/', '＼': '\\', '｜': '|', '＆': '&', '＾': '^', '％': '%',
      '＄': '$', '＃': '#', '＠': '@', '！': '!', '？': '?', '『': '"', '』': '"',
      '«': '"', '»': '"', '′': "'", '″': '"', '‹': '<', '›': '>'
    };

    let homoglyphCount = 0;
    let homoglyphInstances = {};
    
    for (let char in homoglyphMap) {
      const regex = new RegExp(char, 'g');
      const matches = processed.match(regex) || [];
      const count = matches.length;
      
      if (count > 0) {
        homoglyphCount += count;
        homoglyphInstances[char] = {
          replacement: homoglyphMap[char],
          count: count
        };
        processed = processed.split(char).join(homoglyphMap[char]);
      }
    }
    
    if (homoglyphCount > 0) {
      issues.push('Homoglyph characters replaced with standard ASCII equivalents');
      issueDetails['homoglyphs'] = {
        count: homoglyphCount,
        instances: homoglyphInstances,
        description: 'Characters that visually resemble standard ASCII but use different Unicode code points'
      };
    }

    // Step 4: Normalize quotation marks and other punctuation
    const smartQuotes = /[\u2018\u2019\u201C\u201D\u2032\u2033\u2035\u2036]/g;
    let smartQuotesMatches = processed.match(smartQuotes) || [];
    const hasSmartQuotes = smartQuotesMatches.length > 0;
    if (hasSmartQuotes) {
      issues.push('Smart quotes normalized to standard quotes');
      issueDetails['smartQuotes'] = {
        count: smartQuotesMatches.length,
        description: 'Curly or smart quotes that can cause parsing errors in code'
      };
    }
    processed = processed.replace(/[\u2018\u2019\u2032\u2035]/g, "'")
                          .replace(/[\u201C\u201D\u2033\u2036]/g, '"');

    // Step 5: Normalize line endings
    const crlfMatches = processed.match(/\r\n/g) || [];
    const crMatches = processed.match(/\r(?!\n)/g) || [];
    const totalLineEndingIssues = crlfMatches.length + crMatches.length;
    
    if (totalLineEndingIssues > 0) {
      issues.push('Mixed line endings normalized to LF (\\n)');
      issueDetails['lineEndings'] = {
        count: totalLineEndingIssues,
        crlfCount: crlfMatches.length,
        crCount: crMatches.length,
        description: 'Inconsistent line endings that can cause issues in version control and parsing'
      };
    }
    processed = processed.replace(/\r\n?/g, '\n');

    // Step 6: Check for potential control characters
    const controlChars = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
    let controlMatches = processed.match(controlChars) || [];
    const hasControlChars = controlMatches.length > 0;
    if (hasControlChars) {
      issues.push('Control characters detected and removed');
      issueDetails['controlChars'] = {
        count: controlMatches.length,
        description: 'Non-printable control characters that can affect execution or be used for obfuscation'
      };
      processed = processed.replace(controlChars, '');
    }

    // Gather code statistics for the analysis panel
    const stats = analyzeCode(processed);
    
    // Update state with processed code and detected issues
    setCleanedCode(processed);
    setDetectedIssues(issues);
    setCodeStats({
      ...stats,
      issueDetails: issueDetails,
      totalIssuesFixed: Object.values(issueDetails).reduce((sum, detail) => sum + detail.count, 0),
      issueCategories: issues.length,
      originalLength: code.length,
      cleanedLength: processed.length,
      charactersRemoved: code.length - processed.length
    });
    setProcessing(false);
  };

  // Analyze the cleaned code to gather statistics
  const analyzeCode = (code) => {
    // Skip if no code
    if (!code) return null;
    
    // Line count
    const lines = code.split('\n');
    const lineCount = lines.length;
    
    // Character counts
    const alphaCount = (code.match(/[a-zA-Z]/g) || []).length;
    const digitCount = (code.match(/\d/g) || []).length;
    const spaceCount = (code.match(/\s/g) || []).length;
    const symbolCount = code.length - alphaCount - digitCount - spaceCount;
    
    // Detect language patterns
    const languagePatterns = {
      javascript: {
        regex: /function\s|\bconst\b|\blet\b|\bvar\b|\=\>|\bimport\b|\brequire\b|\bexport\b/g,
        count: 0,
        name: "JavaScript"
      },
      python: {
        regex: /\bdef\b|\bimport\b|\bif\s+__name__\s*==\s*('|")__main__\1:|\bclass\b\s+\w+\s*:|:\s*$/g,
        count: 0,
        name: "Python"
      },
      html: {
        regex: /<\/?[a-z][\s\S]*>/i,
        count: 0,
        name: "HTML"
      },
      css: {
        regex: /[\.\#][\w\-]+\s*\{|\@media|\@import|[\w\-]+\s*:\s*[\w\-]+/g,
        count: 0,
        name: "CSS"
      },
      sql: {
        regex: /\bSELECT\b|\bFROM\b|\bWHERE\b|\bJOIN\b|\bGROUP BY\b|\bORDER BY\b/gi,
        count: 0,
        name: "SQL"
      }
    };
    
    // Count pattern matches for each language
    for (const lang in languagePatterns) {
      const matches = code.match(languagePatterns[lang].regex) || [];
      languagePatterns[lang].count = matches.length;
    }
    
    // Determine likely language based on pattern counts
    let likelyLanguage = "Unknown";
    let maxCount = 0;
    
    for (const lang in languagePatterns) {
      if (languagePatterns[lang].count > maxCount) {
        maxCount = languagePatterns[lang].count;
        likelyLanguage = languagePatterns[lang].name;
      }
    }
    
    // Check for indentation style (spaces vs tabs)
    const leadingSpaces = (code.match(/^\s+/gm) || []);
    const tabCount = leadingSpaces.filter(s => s.includes('\t')).length;
    const spaceIndentCount = leadingSpaces.length - tabCount;
    
    // Determine most common indentation level (for spaces)
    const indentSizes = {};
    let mostCommonIndentSize = 0;
    let mostCommonIndentCount = 0;
    
    leadingSpaces.forEach(space => {
      if (!space.includes('\t')) {
        const size = space.length;
        indentSizes[size] = (indentSizes[size] || 0) + 1;
        
        if (indentSizes[size] > mostCommonIndentCount) {
          mostCommonIndentCount = indentSizes[size];
          mostCommonIndentSize = size;
        }
      }
    });
    
    return {
      lineCount,
      charactersTotal: code.length,
      charactersAlpha: alphaCount,
      charactersDigit: digitCount,
      charactersSpace: spaceCount,
      charactersSymbol: symbolCount,
      likelyLanguage,
      indentation: {
        tabs: tabCount,
        spaces: spaceIndentCount,
        mostCommonIndentSize
      }
    };
  };

  // Handle copy to clipboard
  const handleCopy = () => {
    navigator.clipboard.writeText(cleanedCode).then(() => {
      setCopySuccess(true);
      setTimeout(() => setCopySuccess(false), 2000);
    });
  };

  // Toggle details section
  const toggleDetails = () => {
    setShowDetails(!showDetails);
  };

  // Toggle code analysis section
  const toggleCodeAnalysis = () => {
    setShowCodeAnalysis(!showCodeAnalysis);
  };

  return (
    <div className="flex flex-col w-full max-w-6xl mx-auto p-4 space-y-6">
      <div className="text-center">
        <h1 className="text-2xl font-bold mb-2">Code Sanitizer Tool</h1>
        <p className="text-gray-600">Detects and removes invisible characters, homoglyphs, and other potential issues in code</p>
      </div>
      
      <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
        <div className="flex flex-col">
          <label className="font-medium mb-2">Paste Code with Potential Issues:</label>
          <textarea 
            className="w-full h-64 p-2 border border-gray-300 rounded font-mono text-sm resize-none"
            value={inputCode}
            onChange={(e) => setInputCode(e.target.value)}
            placeholder="Paste your code here..."
          />
        </div>
        
        <div className="flex flex-col">
          <div className="flex justify-between items-center mb-2">
            <label className="font-medium">Sanitized Code:</label>
            <button 
              onClick={handleCopy}
              disabled={!cleanedCode}
              className="flex items-center space-x-1 px-3 py-1 bg-blue-600 text-white rounded hover:bg-blue-700 disabled:bg-gray-400"
            >
              <Clipboard size={16} />
              <span>{copySuccess ? 'Copied!' : 'Copy'}</span>
            </button>
          </div>
          <textarea 
            className="w-full h-64 p-2 border border-gray-300 rounded font-mono text-sm resize-none bg-gray-50"
            value={cleanedCode}
            readOnly
            placeholder="Sanitized code will appear here..."
          />
        </div>
      </div>
      
      {detectedIssues.length > 0 && (
        <div className="mt-4">
          <button 
            onClick={toggleDetails}
            className="flex items-center text-blue-600 hover:text-blue-800 font-medium"
          >
            {showDetails ? 'Hide' : 'Show'} Details 
            ({detectedIssues.length} issue{detectedIssues.length !== 1 ? 's' : ''} detected)
          </button>
          
          {showDetails && (
            <div className="mt-2 p-3 bg-yellow-50 border border-yellow-200 rounded">
              <h3 className="font-medium mb-2">Detected Issues:</h3>
              <ul className="list-disc pl-5 space-y-1">
                {detectedIssues.map((issue, index) => (
                  <li key={index} className="text-sm">{issue}</li>
                ))}
              </ul>
              <div className="mt-3 text-sm text-gray-600">
                <p>This tool has removed or normalized potentially problematic characters to create clean, consistent code.</p>
              </div>
            </div>
          )}
        </div>
      )}
      
      {codeStats && inputCode && (
        <div className="mt-4">
          <button 
            onClick={toggleCodeAnalysis}
            className="flex items-center text-blue-600 hover:text-blue-800 font-medium"
          >
            <BarChart size={16} className="mr-1" />
            {showCodeAnalysis ? 'Hide' : 'Show'} Code Analysis Summary
          </button>
          
          {showCodeAnalysis && (
            <div className="mt-2 p-4 bg-blue-50 border border-blue-200 rounded">
              <h3 className="font-medium text-lg mb-3">Code Analysis Summary</h3>
              
              <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
                <div>
                  <h4 className="font-medium mb-2">Sanitization Results</h4>
                  <ul className="text-sm space-y-1">
                    <li>
                      <span className="font-medium">Issues Fixed:</span> {codeStats.totalIssuesFixed} problematic characters in {codeStats.issueCategories} categories
                    </li>
                    <li>
                      <span className="font-medium">Characters Removed:</span> {codeStats.charactersRemoved} ({((codeStats.charactersRemoved / codeStats.originalLength) * 100).toFixed(2)}% of original)
                    </li>
                    {Object.entries(codeStats.issueDetails).map(([key, detail]) => (
                      <li key={key} className="pl-2 text-gray-600">
                        • {detail.count} {detail.description}
                      </li>
                    ))}
                  </ul>
                </div>
                
                <div>
                  <h4 className="font-medium mb-2">Code Statistics</h4>
                  <ul className="text-sm space-y-1">
                    <li>
                      <span className="font-medium">Detected Language:</span> {codeStats.likelyLanguage}
                    </li>
                    <li>
                      <span className="font-medium">Line Count:</span> {codeStats.lineCount} lines
                    </li>
                    <li>
                      <span className="font-medium">Character Composition:</span>
                      <ul className="pl-4 pt-1">
                        <li>
                          <div className="flex items-center">
                            <div className="w-24">Alphabetic:</div>
                            <div className="w-16">{codeStats.charactersAlpha}</div>
                            <div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
                              <div 
                                className="bg-blue-600 h-full" 
                                style={{width: `${(codeStats.charactersAlpha / codeStats.charactersTotal) * 100}%`}}
                              ></div>
                            </div>
                            <div className="ml-2 text-xs">
                              {((codeStats.charactersAlpha / codeStats.charactersTotal) * 100).toFixed(1)}%
                            </div>
                          </div>
                        </li>
                        <li>
                          <div className="flex items-center">
                            <div className="w-24">Numeric:</div>
                            <div className="w-16">{codeStats.charactersDigit}</div>
                            <div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
                              <div 
                                className="bg-green-600 h-full" 
                                style={{width: `${(codeStats.charactersDigit / codeStats.charactersTotal) * 100}%`}}
                              ></div>
                            </div>
                            <div className="ml-2 text-xs">
                              {((codeStats.charactersDigit / codeStats.charactersTotal) * 100).toFixed(1)}%
                            </div>
                          </div>
                        </li>
                        <li>
                          <div className="flex items-center">
                            <div className="w-24">Whitespace:</div>
                            <div className="w-16">{codeStats.charactersSpace}</div>
                            <div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
                              <div 
                                className="bg-yellow-500 h-full" 
                                style={{width: `${(codeStats.charactersSpace / codeStats.charactersTotal) * 100}%`}}
                              ></div>
                            </div>
                            <div className="ml-2 text-xs">
                              {((codeStats.charactersSpace / codeStats.charactersTotal) * 100).toFixed(1)}%
                            </div>
                          </div>
                        </li>
                        <li>
                          <div className="flex items-center">
                            <div className="w-24">Symbols:</div>
                            <div className="w-16">{codeStats.charactersSymbol}</div>
                            <div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
                              <div 
                                className="bg-purple-600 h-full" 
                                style={{width: `${(codeStats.charactersSymbol / codeStats.charactersTotal) * 100}%`}}
                              ></div>
                            </div>
                            <div className="ml-2 text-xs">
                              {((codeStats.charactersSymbol / codeStats.charactersTotal) * 100).toFixed(1)}%
                            </div>
                          </div>
                        </li>
                      </ul>
                    </li>
                    <li>
                      <span className="font-medium">Indentation Style:</span> {codeStats.indentation.tabs > codeStats.indentation.spaces ? 'Tabs' : 'Spaces'}
                      {codeStats.indentation.spaces > codeStats.indentation.tabs && codeStats.indentation.mostCommonIndentSize > 0 && (
                        <span> ({codeStats.indentation.mostCommonIndentSize} spaces)</span>
                      )}
                    </li>
                  </ul>
                </div>
              </div>
            </div>
          )}
        </div>
      )}
      
      <div className="mt-4 p-4 bg-blue-50 border border-blue-200 rounded">
        <h2 className="font-medium mb-2">How to Use:</h2>
        <ol className="list-decimal pl-5 space-y-1 text-sm">
          <li>Copy code that might contain hidden or problematic characters</li>
          <li>Paste it into the left text area</li>
          <li>The tool will automatically detect and clean the code</li>
          <li>Review any detected issues in the details section</li>
          <li>View the code analysis summary for additional insights</li>
          <li>Copy the sanitized code from the right text area</li>
        </ol>
      </div>
    </div>
  );
 };

 export default CodeSanitizer;
	import React, { useState, useEffect } from 'react';
	import { Clipboard, Info, BarChart } from 'lucide-react';
	import _ from 'lodash';

	// Main component for the Code Sanitizer Tool
	const CodeSanitizer = () => {
	const [inputCode, setInputCode] = useState('');
	const [cleanedCode, setCleanedCode] = useState('');
	const [processing, setProcessing] = useState(false);
	const [copySuccess, setCopySuccess] = useState(false);
	const [showDetails, setShowDetails] = useState(false);
	const [detectedIssues, setDetectedIssues] = useState([]);
	const [codeStats, setCodeStats] = useState(null);
	const [showCodeAnalysis, setShowCodeAnalysis] = useState(false);

	// Process input code when it changes
	useEffect(() => {
	if (inputCode) {
	processCode(inputCode);
	} else {
	setCleanedCode('');
	setDetectedIssues([]);
	setCodeStats(null);
	}
	}, [inputCode]);

	// Process the code to remove invisible characters and normalize it
	const processCode = (code) => {
	setProcessing(true);

	// Create a list to store detected issues
	const issues = [];
	const issueDetails = {};

	// Step 1: Detect and replace zero-width characters
	const zeroWidthPattern = /[\u200B-\u200D\uFEFF\u2060]/g;
	let zeroWidthMatches = code.match(zeroWidthPattern) \|\| [];
	const hasZeroWidth = zeroWidthMatches.length > 0;
	if (hasZeroWidth) {
	issues.push('Zero-width characters detected and removed');
	issueDetails['zeroWidth'] = {
	count: zeroWidthMatches.length,
	description: 'Invisible characters that can hide malicious code'
	};
	}
	let processed = code.replace(zeroWidthPattern, '');

	// Step 2: Handle non-standard whitespace characters
	const nonStandardWhitespace = /[\u00A0\u2000-\u200A\u202F\u205F\u3000]/g;
	let whitespaceMatches = processed.match(nonStandardWhitespace) \|\| [];
	const hasNonStandardWhitespace = whitespaceMatches.length > 0;
	if (hasNonStandardWhitespace) {
	issues.push('Non-standard whitespace characters normalized');
	issueDetails['whitespace'] = {
	count: whitespaceMatches.length,
	description: 'Unusual whitespace characters that look like spaces but have different code points'
	};
	}
	processed = processed.replace(nonStandardWhitespace, ' ');

	// Step 3: Handle homoglyphs (characters that look similar but are different)
	const homoglyphMap = {
	'ｎ': 'n', 'ｏ': 'o', 'ｐ': 'p', 'ｑ': 'q', 'ｒ': 'r', 'ｓ': 's', 'ｔ': 't',
	'ｕ': 'u', 'ｖ': 'v', 'ｗ': 'w', 'ｘ': 'x', 'ｙ': 'y', 'ｚ': 'z', 'ａ': 'a',
	'ｂ': 'b', 'ｃ': 'c', 'ｄ': 'd', 'ｅ': 'e', 'ｆ': 'f', 'ｇ': 'g', 'ｈ': 'h',
	'ｉ': 'i', 'ｊ': 'j', 'ｋ': 'k', 'ｌ': 'l', 'ｍ': 'm', '０': '0', '１': '1',
	'２': '2', '３': '3', '４': '4', '５': '5', '６': '6', '７': '7', '８': '8',
	'９': '9', '；': ';', '（': '(', '）': ')', '［': '[', '］': ']', '｛': '{',
	'｝': '}', '：': ':', '，': ',', '．': '.', '＋': '+', '－': '-', '＝': '=',
	'＊': '*', '／': '/', '＼': '\\', '｜': '\|', '＆': '&', '＾': '^', '％': '%',
	'＄': '$', '＃': '#', '＠': '@', '！': '!', '？': '?', '『': '"', '』': '"',
	'«': '"', '»': '"', '′': "'", '″': '"', '‹': '<', '›': '>'
	};

	let homoglyphCount = 0;
	let homoglyphInstances = {};

	for (let char in homoglyphMap) {
	const regex = new RegExp(char, 'g');
	const matches = processed.match(regex) \|\| [];
	const count = matches.length;

	if (count > 0) {
	homoglyphCount += count;
	homoglyphInstances[char] = {
	replacement: homoglyphMap[char],
	count: count
	};
	processed = processed.split(char).join(homoglyphMap[char]);
	}
	}

	if (homoglyphCount > 0) {
	issues.push('Homoglyph characters replaced with standard ASCII equivalents');
	issueDetails['homoglyphs'] = {
	count: homoglyphCount,
	instances: homoglyphInstances,
	description: 'Characters that visually resemble standard ASCII but use different Unicode code points'
	};
	}

	// Step 4: Normalize quotation marks and other punctuation
	const smartQuotes = /[\u2018\u2019\u201C\u201D\u2032\u2033\u2035\u2036]/g;
	let smartQuotesMatches = processed.match(smartQuotes) \|\| [];
	const hasSmartQuotes = smartQuotesMatches.length > 0;
	if (hasSmartQuotes) {
	issues.push('Smart quotes normalized to standard quotes');
	issueDetails['smartQuotes'] = {
	count: smartQuotesMatches.length,
	description: 'Curly or smart quotes that can cause parsing errors in code'
	};
	}
	processed = processed.replace(/[\u2018\u2019\u2032\u2035]/g, "'")
	.replace(/[\u201C\u201D\u2033\u2036]/g, '"');

	// Step 5: Normalize line endings
	const crlfMatches = processed.match(/\r\n/g) \|\| [];
	const crMatches = processed.match(/\r(?!\n)/g) \|\| [];
	const totalLineEndingIssues = crlfMatches.length + crMatches.length;

	if (totalLineEndingIssues > 0) {
	issues.push('Mixed line endings normalized to LF (\\n)');
	issueDetails['lineEndings'] = {
	count: totalLineEndingIssues,
	crlfCount: crlfMatches.length,
	crCount: crMatches.length,
	description: 'Inconsistent line endings that can cause issues in version control and parsing'
	};
	}
	processed = processed.replace(/\r\n?/g, '\n');

	// Step 6: Check for potential control characters
	const controlChars = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g;
	let controlMatches = processed.match(controlChars) \|\| [];
	const hasControlChars = controlMatches.length > 0;
	if (hasControlChars) {
	issues.push('Control characters detected and removed');
	issueDetails['controlChars'] = {
	count: controlMatches.length,
	description: 'Non-printable control characters that can affect execution or be used for obfuscation'
	};
	processed = processed.replace(controlChars, '');
	}

	// Gather code statistics for the analysis panel
	const stats = analyzeCode(processed);

	// Update state with processed code and detected issues
	setCleanedCode(processed);
	setDetectedIssues(issues);
	setCodeStats({
	...stats,
	issueDetails: issueDetails,
	totalIssuesFixed: Object.values(issueDetails).reduce((sum, detail) => sum + detail.count, 0),
	issueCategories: issues.length,
	originalLength: code.length,
	cleanedLength: processed.length,
	charactersRemoved: code.length - processed.length
	});
	setProcessing(false);
	};

	// Analyze the cleaned code to gather statistics
	const analyzeCode = (code) => {
	// Skip if no code
	if (!code) return null;

	// Line count
	const lines = code.split('\n');
	const lineCount = lines.length;

	// Character counts
	const alphaCount = (code.match(/[a-zA-Z]/g) \|\| []).length;
	const digitCount = (code.match(/\d/g) \|\| []).length;
	const spaceCount = (code.match(/\s/g) \|\| []).length;
	const symbolCount = code.length - alphaCount - digitCount - spaceCount;

	// Detect language patterns
	const languagePatterns = {
	javascript: {
	regex: /function\s\|\bconst\b\|\blet\b\|\bvar\b\|\=\>\|\bimport\b\|\brequire\b\|\bexport\b/g,
	count: 0,
	name: "JavaScript"
	},
	python: {
	regex: /\bdef\b\|\bimport\b\|\bif\s+__name__\s==\s('\|")__main__\1:\|\bclass\b\s+\w+\s:\|:\s$/g,
	count: 0,
	name: "Python"
	},
	html: {
	regex: /<\/?[a-z][\s\S]*>/i,
	count: 0,
	name: "HTML"
	},
	css: {
	regex: /[\.\#][\w\-]+\s\{\|\@media\|\@import\|[\w\-]+\s:\s*[\w\-]+/g,
	count: 0,
	name: "CSS"
	},
	sql: {
	regex: /\bSELECT\b\|\bFROM\b\|\bWHERE\b\|\bJOIN\b\|\bGROUP BY\b\|\bORDER BY\b/gi,
	count: 0,
	name: "SQL"
	}
	};

	// Count pattern matches for each language
	for (const lang in languagePatterns) {
	const matches = code.match(languagePatterns[lang].regex) \|\| [];
	languagePatterns[lang].count = matches.length;
	}

	// Determine likely language based on pattern counts
	let likelyLanguage = "Unknown";
	let maxCount = 0;

	for (const lang in languagePatterns) {
	if (languagePatterns[lang].count > maxCount) {
	maxCount = languagePatterns[lang].count;
	likelyLanguage = languagePatterns[lang].name;
	}
	}

	// Check for indentation style (spaces vs tabs)
	const leadingSpaces = (code.match(/^\s+/gm) \|\| []);
	const tabCount = leadingSpaces.filter(s => s.includes('\t')).length;
	const spaceIndentCount = leadingSpaces.length - tabCount;

	// Determine most common indentation level (for spaces)
	const indentSizes = {};
	let mostCommonIndentSize = 0;
	let mostCommonIndentCount = 0;

	leadingSpaces.forEach(space => {
	if (!space.includes('\t')) {
	const size = space.length;
	indentSizes[size] = (indentSizes[size] \|\| 0) + 1;

	if (indentSizes[size] > mostCommonIndentCount) {
	mostCommonIndentCount = indentSizes[size];
	mostCommonIndentSize = size;
	}
	}
	});

	return {
	lineCount,
	charactersTotal: code.length,
	charactersAlpha: alphaCount,
	charactersDigit: digitCount,
	charactersSpace: spaceCount,
	charactersSymbol: symbolCount,
	likelyLanguage,
	indentation: {
	tabs: tabCount,
	spaces: spaceIndentCount,
	mostCommonIndentSize
	}
	};
	};

	// Handle copy to clipboard
	const handleCopy = () => {
	navigator.clipboard.writeText(cleanedCode).then(() => {
	setCopySuccess(true);
	setTimeout(() => setCopySuccess(false), 2000);
	});
	};

	// Toggle details section
	const toggleDetails = () => {
	setShowDetails(!showDetails);
	};

	// Toggle code analysis section
	const toggleCodeAnalysis = () => {
	setShowCodeAnalysis(!showCodeAnalysis);
	};

	return (
	<div className="flex flex-col w-full max-w-6xl mx-auto p-4 space-y-6">
	<div className="text-center">
	<h1 className="text-2xl font-bold mb-2">Code Sanitizer Tool</h1>
	<p className="text-gray-600">Detects and removes invisible characters, homoglyphs, and other potential issues in code</p>
	</div>

	<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
	<div className="flex flex-col">
	<label className="font-medium mb-2">Paste Code with Potential Issues:</label>
	<textarea
	className="w-full h-64 p-2 border border-gray-300 rounded font-mono text-sm resize-none"
	value={inputCode}
	onChange={(e) => setInputCode(e.target.value)}
	placeholder="Paste your code here..."
	/>
	</div>

	<div className="flex flex-col">
	<div className="flex justify-between items-center mb-2">
	<label className="font-medium">Sanitized Code:</label>
	<button
	onClick={handleCopy}
	disabled={!cleanedCode}
	className="flex items-center space-x-1 px-3 py-1 bg-blue-600 text-white rounded hover:bg-blue-700 disabled:bg-gray-400"
	>
	<Clipboard size={16} />
	<span>{copySuccess ? 'Copied!' : 'Copy'}</span>
	</button>
	</div>
	<textarea
	className="w-full h-64 p-2 border border-gray-300 rounded font-mono text-sm resize-none bg-gray-50"
	value={cleanedCode}
	readOnly
	placeholder="Sanitized code will appear here..."
	/>
	</div>
	</div>

	{detectedIssues.length > 0 && (
	<div className="mt-4">
	<button
	onClick={toggleDetails}
	className="flex items-center text-blue-600 hover:text-blue-800 font-medium"
	>
	{showDetails ? 'Hide' : 'Show'} Details
	({detectedIssues.length} issue{detectedIssues.length !== 1 ? 's' : ''} detected)
	</button>

	{showDetails && (
	<div className="mt-2 p-3 bg-yellow-50 border border-yellow-200 rounded">
	<h3 className="font-medium mb-2">Detected Issues:</h3>
	<ul className="list-disc pl-5 space-y-1">
	{detectedIssues.map((issue, index) => (
	<li key={index} className="text-sm">{issue}</li>
	))}
	</ul>
	<div className="mt-3 text-sm text-gray-600">
	<p>This tool has removed or normalized potentially problematic characters to create clean, consistent code.</p>
	</div>
	</div>
	)}
	</div>
	)}

	{codeStats && inputCode && (
	<div className="mt-4">
	<button
	onClick={toggleCodeAnalysis}
	className="flex items-center text-blue-600 hover:text-blue-800 font-medium"
	>
	<BarChart size={16} className="mr-1" />
	{showCodeAnalysis ? 'Hide' : 'Show'} Code Analysis Summary
	</button>

	{showCodeAnalysis && (
	<div className="mt-2 p-4 bg-blue-50 border border-blue-200 rounded">
	<h3 className="font-medium text-lg mb-3">Code Analysis Summary</h3>

	<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
	<div>
	<h4 className="font-medium mb-2">Sanitization Results</h4>
	<ul className="text-sm space-y-1">
	<li>
	<span className="font-medium">Issues Fixed:</span> {codeStats.totalIssuesFixed} problematic characters in {codeStats.issueCategories} categories
	</li>
	<li>
	<span className="font-medium">Characters Removed:</span> {codeStats.charactersRemoved} ({((codeStats.charactersRemoved / codeStats.originalLength) * 100).toFixed(2)}% of original)
	</li>
	{Object.entries(codeStats.issueDetails).map(([key, detail]) => (
	<li key={key} className="pl-2 text-gray-600">
	• {detail.count} {detail.description}
	</li>
	))}
	</ul>
	</div>

	<div>
	<h4 className="font-medium mb-2">Code Statistics</h4>
	<ul className="text-sm space-y-1">
	<li>
	<span className="font-medium">Detected Language:</span> {codeStats.likelyLanguage}
	</li>
	<li>
	<span className="font-medium">Line Count:</span> {codeStats.lineCount} lines
	</li>
	<li>
	<span className="font-medium">Character Composition:</span>
	<ul className="pl-4 pt-1">
	<li>
	<div className="flex items-center">
	<div className="w-24">Alphabetic:</div>
	<div className="w-16">{codeStats.charactersAlpha}</div>
	<div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
	<div
	className="bg-blue-600 h-full"
	style={{width: `${(codeStats.charactersAlpha / codeStats.charactersTotal) * 100}%`}}
	></div>
	</div>
	<div className="ml-2 text-xs">
	{((codeStats.charactersAlpha / codeStats.charactersTotal) * 100).toFixed(1)}%
	</div>
	</div>
	</li>
	<li>
	<div className="flex items-center">
	<div className="w-24">Numeric:</div>
	<div className="w-16">{codeStats.charactersDigit}</div>
	<div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
	<div
	className="bg-green-600 h-full"
	style={{width: `${(codeStats.charactersDigit / codeStats.charactersTotal) * 100}%`}}
	></div>
	</div>
	<div className="ml-2 text-xs">
	{((codeStats.charactersDigit / codeStats.charactersTotal) * 100).toFixed(1)}%
	</div>
	</div>
	</li>
	<li>
	<div className="flex items-center">
	<div className="w-24">Whitespace:</div>
	<div className="w-16">{codeStats.charactersSpace}</div>
	<div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
	<div
	className="bg-yellow-500 h-full"
	style={{width: `${(codeStats.charactersSpace / codeStats.charactersTotal) * 100}%`}}
	></div>
	</div>
	<div className="ml-2 text-xs">
	{((codeStats.charactersSpace / codeStats.charactersTotal) * 100).toFixed(1)}%
	</div>
	</div>
	</li>
	<li>
	<div className="flex items-center">
	<div className="w-24">Symbols:</div>
	<div className="w-16">{codeStats.charactersSymbol}</div>
	<div className="w-20 bg-gray-200 h-3 rounded overflow-hidden">
	<div
	className="bg-purple-600 h-full"
	style={{width: `${(codeStats.charactersSymbol / codeStats.charactersTotal) * 100}%`}}
	></div>
	</div>
	<div className="ml-2 text-xs">
	{((codeStats.charactersSymbol / codeStats.charactersTotal) * 100).toFixed(1)}%
	</div>
	</div>
	</li>
	</ul>
	</li>
	<li>
	<span className="font-medium">Indentation Style:</span> {codeStats.indentation.tabs > codeStats.indentation.spaces ? 'Tabs' : 'Spaces'}
	{codeStats.indentation.spaces > codeStats.indentation.tabs && codeStats.indentation.mostCommonIndentSize > 0 && (
	<span> ({codeStats.indentation.mostCommonIndentSize} spaces)</span>
	)}
	</li>
	</ul>
	</div>
	</div>
	</div>
	)}
	</div>
	)}

	<div className="mt-4 p-4 bg-blue-50 border border-blue-200 rounded">
	<h2 className="font-medium mb-2">How to Use:</h2>
	<ol className="list-decimal pl-5 space-y-1 text-sm">
	<li>Copy code that might contain hidden or problematic characters</li>
	<li>Paste it into the left text area</li>
	<li>The tool will automatically detect and clean the code</li>
	<li>Review any detected issues in the details section</li>
	<li>View the code analysis summary for additional insights</li>
	<li>Copy the sanitized code from the right text area</li>
	</ol>
	</div>
	</div>
	);
	};

	export default CodeSanitizer;