Created
August 2, 2024 21:17
-
-
Save WomB0ComB0/738771d2c8ad5e488348a7c2b918bf51 to your computer and use it in GitHub Desktop.
Generate all possible alpha characters , on the point of casing (capital lowercase)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from 'fs'; | |
import { fileURLToPath } from 'url'; | |
import path from 'path'; | |
import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'; | |
const __filename: string = fileURLToPath(import.meta.url); | |
const __dirname: string = path.dirname(__filename); | |
const filePath = path.join(__dirname, 'server', 'data', 'nsfw-names.txt'); | |
const fileContent = fs.readFileSync(filePath, 'utf8'); | |
class TrieNode { | |
children: Map<string, TrieNode>; | |
isEndOfWord: boolean; | |
constructor() { | |
this.children = new Map(); | |
this.isEndOfWord = false; | |
} | |
} | |
class Trie { | |
root: TrieNode; | |
constructor() { | |
this.root = new TrieNode(); | |
} | |
insert(word: string) { | |
let node = this.root; | |
for (const char of word) { | |
if (!node.children.has(char)) { | |
node.children.set(char, new TrieNode()); | |
} | |
node = node.children.get(char)!; | |
} | |
node.isEndOfWord = true; | |
} | |
extractWordsFromTrie(): Set<string> { | |
const results = new Set<string>(); | |
const traverse = (node: TrieNode, prefix: string) => { | |
if (node.isEndOfWord) { | |
results.add(prefix); | |
} | |
for (const [char, childNode] of node.children) { | |
traverse(childNode, prefix + char); | |
} | |
}; | |
traverse(this.root, ''); | |
return results; | |
} | |
} | |
const generateCasingVariations = (text: string): string[] => { | |
const variations: Set<string> = new Set<string>(); | |
const generate = (str: string, index: number) => { | |
if (index === str.length) { | |
variations.add(str); | |
return; | |
} | |
generate(str, index + 1); | |
if (str[index] !== str[index].toUpperCase()) { | |
generate(str.substring(0, index) + str[index].toUpperCase() + str.substring(index + 1), index + 1); | |
} | |
}; | |
generate(text, 0); | |
return Array.from(variations); | |
}; | |
const processWordsInWorker = (words: string[]) => { | |
const trie = new Trie(); | |
words.forEach(word => { | |
const variations = generateCasingVariations(word); | |
variations.forEach(variation => trie.insert(variation)); | |
}); | |
return trie.extractWordsFromTrie(); | |
}; | |
if (isMainThread) { | |
const words = fileContent.split('\n').map(word => word.trim()).filter(word => word.length > 0); | |
const numWorkers = 4; | |
const chunkSize = Math.ceil(words.length / numWorkers); | |
const workers: Worker[] = []; | |
const promises: Promise<Set<string>>[] = []; | |
for (let i = 0; i < numWorkers; i++) { | |
const chunk = words.slice(i * chunkSize, (i + 1) * chunkSize); | |
const worker = new Worker(__filename, { | |
workerData: chunk | |
}); | |
workers.push(worker); | |
promises.push(new Promise((resolve, reject) => { | |
worker.on('message', resolve); | |
worker.on('error', reject); | |
worker.on('exit', code => { | |
if (code !== 0) { | |
reject(new Error(`Worker stopped with exit code ${code}`)); | |
} | |
}); | |
})); | |
} | |
Promise.all(promises) | |
.then(results => { | |
const combinedResults = new Set<string>(); | |
results.forEach(result => result.forEach(word => combinedResults.add(word))); | |
const newFilePath = path.join(__dirname, 'server', 'data', 'nsfw-names-all-casing.txt'); | |
fs.writeFileSync(newFilePath, Array.from(combinedResults).join('\n'), 'utf8'); | |
console.log('Processed names with all casing variations have been written to', newFilePath); | |
}) | |
.catch(err => console.error('Error processing words:', err)); | |
} else { | |
const words = workerData as string[]; | |
const result = processWordsInWorker(words); | |
parentPort!.postMessage(result); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment