Created
May 29, 2015 16:51
-
-
Save itay-grudev/1204a53b3378e4765fee to your computer and use it in GitHub Desktop.
CodeWord Frequency Analyzer written with Qt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <QCoreApplication> | |
#include <QFile> | |
#include <QHash> | |
struct CodeWord { | |
CodeWord() { count = 0; } | |
CodeWord(QString word, unsigned int count) : word(word), count(count) {} | |
QString word; | |
unsigned int count; | |
}; | |
struct CodeWordCmp { | |
bool operator()(CodeWord a, CodeWord b) { | |
return a.count > b.count; | |
} | |
}; | |
int main(int argc, char *argv[]) | |
{ | |
QCoreApplication a(argc, argv); | |
bool hexflag = false; | |
if(argc <= 1) | |
{ | |
qDebug("Usage: codewordanalyzer [--hex] <input filename> [input filename] ...\n" | |
" Options:\n" | |
" --hex Display code words in hex"); | |
return 1; | |
} | |
int skip = 1; | |
if( QString(argv[skip]) == "--hex" ) { | |
hexflag = true; | |
++skip; | |
} | |
// Attempt opening all files listed in the arguments | |
QFile **files = new QFile* [argc - 1]; | |
bool error = false; | |
for(int f = skip; f < argc; ++f) | |
{ | |
files[f-skip] = new QFile(argv[f]); | |
if( files[f-skip]->error() != QFile::NoError ) | |
{ | |
qWarning("%s", files[f-skip]->errorString().toStdString().c_str()); | |
error = true; | |
} | |
} | |
if( error ) return 1; | |
// Initalize a hash table | |
QHash<QString, CodeWord> codeWords; | |
// Itterate over all the files | |
for(int f = 0; f < argc - skip; ++f) | |
{ | |
files[f]->open(QFile::ReadOnly); | |
// Itterate over codewords of different length | |
for(int l = 1; l <= 5; ++l) | |
{ | |
qDebug("Acquiring code words of length %d from %s", l, argv[f+skip]); | |
qint64 res; | |
char *tmp = new char[l]; | |
for(int i = 0; i < files[f]->size() - l; ++i) { | |
if( ! files[f]->seek(i) ) { | |
qCritical("Error encountered while seeking inside file: %s:%d", argv[f+skip], i); | |
continue; | |
} | |
res = files[f]->read(tmp, l); | |
if( res != l) { | |
qCritical("Error encountered while reading from file: %s:%d", argv[f+skip], i); | |
continue; | |
} | |
QString word(tmp); | |
++codeWords[word].count; | |
if(codeWords[word].count) | |
codeWords[word].word = word; | |
} | |
delete[] tmp; | |
} | |
} | |
QList<CodeWord> orderedCodeWords = codeWords.values(); | |
qSort(orderedCodeWords.begin(), orderedCodeWords.end(), CodeWordCmp()); | |
qDebug("Acquired code words: %d", codeWords.size()); | |
for(int i = 0; i < qMin(256, orderedCodeWords.length()); ++i) { | |
if( hexflag ) | |
{ | |
qDebug("0x%s: %d", orderedCodeWords[i].word.toLatin1().toHex().constData(), orderedCodeWords[i].count); | |
} else { | |
// Generate an ASCII escape sequence | |
QString word = orderedCodeWords[i].word; | |
word.replace('\'', "\\\'"); | |
word.replace('\"', "\\\""); | |
word.replace('\?', "\\\?"); | |
word.replace('\\', "\\\\"); | |
word.replace('\a', "\\a"); | |
word.replace('\b', "\\b"); | |
word.replace('\f', "\\f"); | |
word.replace('\n', "\\n"); | |
word.replace('\r', "\\r"); | |
word.replace('\t', "\\t"); | |
word.replace('\v', "\\v"); | |
word.replace('\v', "\\v"); | |
qDebug("%s: %d", word.toStdString().c_str(), orderedCodeWords[i].count); | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment