Skip to content

Instantly share code, notes, and snippets.

@itay-grudev
Created May 29, 2015 16:51
Show Gist options
  • Save itay-grudev/1204a53b3378e4765fee to your computer and use it in GitHub Desktop.
Save itay-grudev/1204a53b3378e4765fee to your computer and use it in GitHub Desktop.
CodeWord Frequency Analyzer written with Qt
#include <QCoreApplication>
#include <QFile>
#include <QHash>
struct CodeWord {
CodeWord() { count = 0; }
CodeWord(QString word, unsigned int count) : word(word), count(count) {}
QString word;
unsigned int count;
};
struct CodeWordCmp {
bool operator()(CodeWord a, CodeWord b) {
return a.count > b.count;
}
};
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
bool hexflag = false;
if(argc <= 1)
{
qDebug("Usage: codewordanalyzer [--hex] <input filename> [input filename] ...\n"
" Options:\n"
" --hex Display code words in hex");
return 1;
}
int skip = 1;
if( QString(argv[skip]) == "--hex" ) {
hexflag = true;
++skip;
}
// Attempt opening all files listed in the arguments
QFile **files = new QFile* [argc - 1];
bool error = false;
for(int f = skip; f < argc; ++f)
{
files[f-skip] = new QFile(argv[f]);
if( files[f-skip]->error() != QFile::NoError )
{
qWarning("%s", files[f-skip]->errorString().toStdString().c_str());
error = true;
}
}
if( error ) return 1;
// Initalize a hash table
QHash<QString, CodeWord> codeWords;
// Itterate over all the files
for(int f = 0; f < argc - skip; ++f)
{
files[f]->open(QFile::ReadOnly);
// Itterate over codewords of different length
for(int l = 1; l <= 5; ++l)
{
qDebug("Acquiring code words of length %d from %s", l, argv[f+skip]);
qint64 res;
char *tmp = new char[l];
for(int i = 0; i < files[f]->size() - l; ++i) {
if( ! files[f]->seek(i) ) {
qCritical("Error encountered while seeking inside file: %s:%d", argv[f+skip], i);
continue;
}
res = files[f]->read(tmp, l);
if( res != l) {
qCritical("Error encountered while reading from file: %s:%d", argv[f+skip], i);
continue;
}
QString word(tmp);
++codeWords[word].count;
if(codeWords[word].count)
codeWords[word].word = word;
}
delete[] tmp;
}
}
QList<CodeWord> orderedCodeWords = codeWords.values();
qSort(orderedCodeWords.begin(), orderedCodeWords.end(), CodeWordCmp());
qDebug("Acquired code words: %d", codeWords.size());
for(int i = 0; i < qMin(256, orderedCodeWords.length()); ++i) {
if( hexflag )
{
qDebug("0x%s: %d", orderedCodeWords[i].word.toLatin1().toHex().constData(), orderedCodeWords[i].count);
} else {
// Generate an ASCII escape sequence
QString word = orderedCodeWords[i].word;
word.replace('\'', "\\\'");
word.replace('\"', "\\\"");
word.replace('\?', "\\\?");
word.replace('\\', "\\\\");
word.replace('\a', "\\a");
word.replace('\b', "\\b");
word.replace('\f', "\\f");
word.replace('\n', "\\n");
word.replace('\r', "\\r");
word.replace('\t', "\\t");
word.replace('\v', "\\v");
word.replace('\v', "\\v");
qDebug("%s: %d", word.toStdString().c_str(), orderedCodeWords[i].count);
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment