Last active
July 20, 2020 14:25
-
-
Save arunesh90/abfcdcfd83c91cb1604f419bf2008ff6 to your computer and use it in GitHub Desktop.
Archive parsing for Discord
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Quick and dirty parser for Discord's archives | |
const path = require('path') | |
const fs = require('fs') | |
const csv = require('csv') | |
const prompt = require('prompt') | |
let stats = { | |
messageCount: 0, | |
words: {}, | |
wordsSorted: [] | |
} | |
const messagesFolder = path.join(process.cwd(), 'messages/') | |
const parseAll = () => { | |
return new Promise(async (resolve, reject) => { | |
Promise.all(fs.readdirSync(messagesFolder).map(channelID => { | |
return new Promise(async (resolve, reject) => { | |
if (channelID === 'index.json') { | |
return resolve() | |
} | |
const messages = fs.readFileSync(path.join(messagesFolder, channelID, 'messages.csv'), 'utf8') | |
const parsed = await parse(messages, { | |
columns: true | |
}) | |
parsed.shift() | |
parsed.forEach(messageInfo => { | |
if (messageInfo) { | |
stats.messageCount++ | |
messageInfo[2].split(' ').forEach(word => { | |
if (!parseInt(word) && !['the', 'a', ' ', ''].includes(word)) { | |
if (stats.words[word]) { | |
stats.words[word]++ | |
} else { | |
stats.words[word] = 1 | |
} | |
} | |
}) | |
} | |
}) | |
resolve() | |
}) | |
})) | |
.then(output => { | |
Object.keys(stats.words).forEach(word => { | |
stats.wordsSorted.push({ | |
word, | |
amount: stats.words[word] | |
}) | |
}) | |
stats.wordsSorted.sort((a, b) => { | |
return a.amount - b.amount | |
}) | |
stats.wordsSorted.reverse() | |
resolve(output) | |
}) | |
}) | |
} | |
const parse = (input) => { | |
return new Promise((resolve, reject) => { | |
csv.parse(input, (err, output) => { | |
if (err) { | |
reject(err) | |
} | |
resolve(output) | |
}) | |
}) | |
} | |
prompt.start() | |
prompt.get(['Top X words'], (err, result) => { | |
if (err) { | |
throw err | |
} | |
console.log('Parsing messages..') | |
parseAll().then(channelList => { | |
console.log(`Top ${result['Top X words']} words:`) | |
for (let i = 1; i < parseInt(result['Top X words']) + 1; i++) { | |
console.log(`${i}. ${stats.wordsSorted[i].word} - ${stats.wordsSorted[i].amount}`) | |
} | |
console.log(`\nTotal messages: ${stats.messageCount}`) | |
}) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment