Created
January 1, 2014 14:24
-
-
Save parttimenerd/8208406 to your computer and use it in GitHub Desktop.
A text query based engine to sort a list of elements.
Eventually used inside a Qt based project.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef BERRYENGINE_H | |
#define BERRYENGINE_H | |
#include <vector> | |
#include <QString> | |
#include <QMap> | |
#include <QHash> | |
#include <QSet> | |
#include <QRegExp> | |
#include <map> | |
#include <set> | |
#include <iostream> | |
#include <functional> | |
#include <algorithm> | |
#include <utility> | |
#include "elementgroup.h" | |
template<typename Element> | |
class BerryEngine | |
{ | |
public: | |
BerryEngine(){ | |
this->filterFuncs["raw"] = [](QString str, const Element &elem){ return elem == str; }; | |
this->filterFuncs["line"] = [](QString str, const Element &elem){ return elem == str; }; | |
this->filterFuncs["file"] = [](QString str, const Element &elem){ return elem == str; }; | |
this->filterFuncs["description"] = [](QString str, const Element &elem){ return elem == str; }; | |
this->filterFuncs["id"] = [](QString str, const Element &elem){ return elem == str; }; | |
this->filterPoolFuncs["line"] = [](const Element &elem){ return "line"; }; | |
this->filterPoolFuncs["file"] = [](const Element elem){ return "file"; }; | |
this->filterPoolFuncs["description"] = [](const Element &elem){ return "description"; }; | |
this->filterPoolFuncs["id"] = [](const Element &elem){ return "id"; }; | |
} | |
/** | |
* @brief Adds a new element and updates the string pools. | |
* @param element new element | |
*/ | |
void addNewElement(Element element){ | |
this->elements.append(element); | |
auto it = this->filterPoolFuncs.constBegin(); | |
while (it != this->filterPoolFuncs.constEnd()) { | |
this->filterPool[it.key()].insert(it.value()(element)); | |
++it; | |
} | |
auto it2 = this->filterCSPoolFuncs.constBegin(); | |
while (it2 != this->filterCSPoolFuncs.constEnd()) { | |
this->filterCSPool[it2.key()].unite(it2.value()(element)); | |
++it; | |
} | |
} | |
/** | |
* @brief Returns a number of suggestions for a given query. | |
* E.g. query = "#sourt" => "#sort" | |
* @param query given query | |
* @param number maximum number of suggestions | |
* @return suggestions for the given query | |
*/ | |
QStringList getSuggestions(QString _query, size_t number){ | |
QString query(_query); | |
if (!query.startsWith("#")){ | |
query = "#raw " + query; | |
} | |
QStringList cmdStrings = query.split("#"); | |
QStringList _cmdStrings = _query.split("#"); | |
QString lastCmdString; | |
if (cmdStrings.empty()){ | |
lastCmdString = ""; | |
} else { | |
lastCmdString = cmdStrings[cmdStrings.size() - 1]; | |
} | |
QStringList suggs = this->getSuggestionsForCmdQuery(lastCmdString, number); | |
for (int i = 0; i < suggs.size(); i++){ | |
if (_cmdStrings.empty()){ | |
suggs[i] = suggs[i].right(suggs[i].size() - 1); | |
} else { | |
_cmdStrings[_cmdStrings.size() - 1] = suggs[i]; | |
suggs[i] = _cmdStrings.join(" #"); | |
} | |
} | |
return suggs; | |
} | |
QList<ElementGroup<Element> > query(QString query){ | |
if (!query.startsWith("#")){ | |
query = "#raw " + query; | |
} | |
QList<Element> elemList; | |
QStringList cmdStrings = query.split("#", QString::SkipEmptyParts); | |
elemList = this->executeFilters(this->elements, cmdStrings); | |
elemList = this->executeFilterCS(elemList, cmdStrings); | |
elemList = this->executeSortCmds(elemList, cmdStrings); | |
auto groups = this->executeGroupCmds(elemList, cmdStrings); | |
return groups; | |
} | |
/** | |
* @brief Reexecutes the last query. | |
* If no query had been executed before, a blank string is used. | |
* | |
* @return query result. | |
*/ | |
QList<ElementGroup<Element>> reexecuteLastQuery(){ | |
return this->query(this->lastQuery); | |
} | |
/** | |
* @brief Sets the elements inherited by this engine. | |
* | |
* @param newElements new elements, now inherited by this engine | |
*/ | |
void setElements(const QList<Element> &newElements){ | |
foreach (Element elem, newElements){ | |
this->addNewElement(elem); | |
} | |
} | |
private: | |
QList<Element> elements; | |
QString lastQuery = ""; | |
QMap<QString, std::function<bool(QString, const Element)> > filterFuncs; | |
QMap<QString, std::function<QString(const Element)> > filterPoolFuncs; | |
QHash<QString, QSet<QString> > filterPool; | |
QMap<QString, std::function<bool(QStringList, const Element)> > filterCSFuncs; | |
QMap<QString, std::function<QSet<QString>(const Element)> > filterCSPoolFuncs; | |
QHash<QString, QSet<QString> > filterCSPool; | |
QMap<QString, std::function<int(Element, const Element)>> sortFuncs; | |
QMap<QString, std::function<QString(const Element)>> groupFuncs; | |
QList<Element> executeFilters(const QList<Element> &elements, const QStringList &cmdStrings){ | |
QList<Element> resList; | |
foreach (QString cmdString, cmdStrings){ | |
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts); | |
QStringList cmd = arr.takeFirst(); | |
QStringList argument = arr.join(" "); | |
if (!arr.empty() && this->isFilterCmd(cmd)){ | |
foreach (Element element, elements){ | |
if (this->filterFuncs[cmd](argument, element) && !resList.contains(element)){ | |
resList.append(element); | |
} | |
} | |
} | |
} | |
return resList; | |
} | |
QList<Element> executeFilterCS(const QList<Element> &elements, const QStringList &cmdStrings){ | |
QList<Element> resList; | |
foreach (QString cmdString, cmdStrings){ | |
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts); | |
QString cmd = arr.takeFirst(); | |
QString argument = arr.join(""); | |
if (!arr.empty() && this->isFilterCSCmd(cmd)){ | |
QStringList args = argument.split(",", QString::SkipEmptyParts); | |
foreach (Element element, elements){ | |
if (this->filterCSFuncs[cmd](args, element) && !resList.contains(element)){ | |
resList.append(element); | |
} | |
} | |
} | |
} | |
return resList; | |
} | |
QList<Element> executeSortCmds(const QList<Element> &elements, const QStringList &cmdStrings){ | |
QList<std::pair<QString, bool> > sortCmds; | |
foreach (QString cmdString, cmdStrings){ | |
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts); | |
if (arr.size() < 2) | |
continue; | |
QString cmd = arr.takeFirst(); | |
if (arr[0] == "by") | |
arr.removeFirst(); | |
arr = arr.join(" ").split(",", QString::SkipEmptyParts); | |
foreach (QString cmdPart, arr){ | |
cmdPart = cmdPart.trimmed(); | |
QStringList cmdPartList = cmdPart.split(" "); | |
if (cmdPartList.empty()) | |
continue; | |
cmdPart = cmdPartList[0]; | |
bool asc = true; | |
if (cmdPartList.size() >= 2){ | |
asc = cmdPartList[1] == "asc"; | |
} | |
if (this->isSortCmd(cmdPart)) | |
sortCmds.append(std::make_pair(cmdPart, asc)); | |
} | |
} | |
QList<Element> resList(elements); | |
foreach (auto sortCmd, sortCmds){ | |
if (sortCmd.second){ | |
qStableSort(resList.begin(), resList.end(), this->sortFuncs[sortCmd.first]); | |
} else { | |
auto sortFunc = this->sortFuncs[sortCmd.first]; | |
qStableSort(resList.begin(), resList.end(), [&](const Element &elem1, const Element &elem2){ | |
return !sortFunc(elem1, elem2); | |
}); | |
} | |
} | |
return resList; | |
} | |
QList<ElementGroup<Element> > executeGroupCmds(const QList<Element> &elements, const QStringList &cmdStrings){ | |
QStringList groupCmds; | |
foreach (QString cmdString, cmdStrings){ | |
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts); | |
if (arr.size() < 2) | |
continue; | |
QString cmd = arr.takeFirst(); | |
if (arr[0] == "by") | |
arr.removeFirst(); | |
arr = arr.join("").split(",", QString::SkipEmptyParts); | |
foreach (QString cmdPart, arr){ | |
QStringList cmdPartList = cmdPart.split(" "); | |
if (cmdPartList.empty()) | |
continue; | |
groupCmds.append(cmdPartList[0]); | |
} | |
} | |
int id = 0; | |
QHash<QStringList, std::pair<QList<Element>, int> > groupHash; | |
foreach (Element element, elements){ | |
QStringList groupTitles; | |
if (!groupHash.contains(groupTitles)){ | |
groupHash[groupTitles] = std::make_pair(QList<Element>(), id); | |
id++; | |
} | |
groupHash[groupTitles].first.append(element); | |
} | |
QList<ElementGroup<Element> > groupList; | |
ElementGroup<Element> dummy(*(new QStringList()), *(new QList<Element>())); | |
for (int i = 0; i < groupHash.size(); i++) | |
groupList.append(dummy); | |
auto it = groupHash.constBegin(); | |
while (it != groupHash.constEnd()) { | |
groupList[it.value().second] = *(new ElementGroup<Element>(it.key(), it.value().first)); | |
it++; | |
} | |
return groupList; | |
} | |
QStringList getSuggestionsForCmdQuery(QString cmdQuery, size_t number){ | |
QStringList tokens = cmdQuery.split(" "); | |
QStringList suggs; | |
if (tokens.empty()) | |
return suggs; | |
bool hasByString = tokens.size() >= 2 && tokens[1] == "by"; | |
QString cmd = tokens[0]; | |
if (isSortCmd(cmd) || isGroupCmd(cmd)){ | |
int frontCut = std::min(1 + (hasByString ? 1 : 0), tokens.size()); | |
tokens = tokens.mid(frontCut, tokens.size()); | |
QStringList args = tokens.join(" ").replace("\\s+", "\\s").split(",", QString::SkipEmptyParts); | |
args.removeDuplicates(); | |
if (isSortCmd(cmd)){ | |
suggs = getSuggestionsForSortCmd(args); | |
} else { | |
suggs = getSuggestionsForGroupCmd(args); | |
} | |
} else if (isFilterCmd(cmd) || isFilterCSCmd(cmd)){ | |
tokens = tokens.mid(0, tokens.size()); | |
QString rejoined = tokens.join(" ").replace("\\s+", "\\s"); | |
if (isFilterCmd(cmd)){ | |
suggs = getSuggestionsForFilterCmd(cmd, rejoined); | |
} else { | |
QStringList args = rejoined.split(",", QString::SkipEmptyParts); | |
suggs = getSuggestionsForFilterCSCmd(cmd, args); | |
} | |
} else { | |
suggs = getSuggestionsForCmd(cmd); | |
} | |
return suggs.mid(0, number); | |
} | |
QStringList getSuggestionsForSortCmd(QStringList args){ | |
QString last; | |
if (args.empty()){ | |
last = ""; | |
} else { | |
last = args[args.size() - 1]; | |
} | |
QStringList pool(this->groupFuncs.keys()); | |
QStringList list; | |
QStringList arr = last.split(" "); | |
if (pool.contains(arr[0])){ | |
list.append("asc"); | |
list.append("desc"); | |
if (arr.size() > 1){ | |
list = this->sortStringsByStringEquality(list, arr[1]); | |
} | |
for (int i = 0; i < list.size(); i++) | |
list[i] = arr[i] + " " + list[i]; | |
} else { | |
list = this->sortStringsByStringEquality(pool, last); | |
} | |
for (int i = 0; i < list.size(); i++){ | |
if (args.empty()){ | |
list[i] = "sort by " + list[i]; | |
} else { | |
args[args.size() - 1] = list[i]; | |
list[i] = "sort by " + args.join(", "); | |
} | |
} | |
return list; | |
} | |
QStringList getSuggestionsForGroupCmd(QStringList args){ | |
QString last; | |
if (args.empty()){ | |
last = ""; | |
} else { | |
last = args[args.size() - 1]; | |
} | |
QStringList pool(this->groupFuncs.keys()); | |
QStringList list = this->sortStringsByStringEquality(pool, last); | |
for (int i = 0; i < list.size(); i++){ | |
if (args.empty()){ | |
list[i] = "group by " + list[i]; | |
} else { | |
args[args.size() - 1] = list[i]; | |
list[i] = "group by " + args.join(", "); | |
} | |
} | |
return list; | |
} | |
QStringList getSuggestionsForFilterCmd(QString cmd, QString argument){ | |
QStringList pool(this->filterPool[cmd].toList()); | |
return this->sortStringsByStringEquality(pool, argument); | |
} | |
QStringList getSuggestionsForFilterCSCmd(QString cmd, QStringList args){ | |
QString last; | |
if (args.empty()){ | |
last = ""; | |
} else { | |
last = args[args.size() - 1]; | |
} | |
QStringList pool(this->filterCSPool[cmd].toList()); | |
QStringList list = this->sortStringsByStringEquality(pool, last); | |
for (int i = 0; i < list.size(); i++){ | |
if (args.empty()){ | |
list[i] = cmd + " " + list[i]; | |
} else { | |
args[args.size() - 1] = list[i]; | |
list[i] = cmd + " " + args.join(", "); | |
} | |
} | |
return list; | |
} | |
QStringList getSuggestionsForCmd(QString cmd){ | |
QStringList suppCmds = getSupportedCommands(); | |
return sortStringsByStringEquality(suppCmds, cmd); | |
} | |
/** | |
* @brief Returns a list of supported commands. | |
* E.g. "#sort by", "#group by", "#[filter name]" | |
* @return a string list of commands | |
*/ | |
QStringList getSupportedCommands(){ | |
QStringList list; | |
{ | |
auto it = this->filterFuncs.constBegin(); | |
while (it != this->filterFuncs.constEnd()) { | |
list.append(it.key()); | |
++it; | |
} | |
} | |
{ | |
auto it = this->filterCSFuncs.constBegin(); | |
while (it != this->filterCSFuncs.constEnd()) { | |
list.append(it.key()); | |
++it; | |
} | |
} | |
{ | |
auto it = this->groupFuncs.constBegin(); | |
while (it != this->groupFuncs.constEnd()) { | |
list.append(it.key() + " by"); | |
++it; | |
} | |
} | |
{ | |
auto it = this->sortFuncs.constBegin(); | |
while (it != this->sortFuncs.constEnd()) { | |
list.append(it.key() + " by "); | |
++it; | |
} | |
} | |
return list; | |
} | |
/** | |
* @brief It sorts the strings by their edit distance to the compareWith string in ascending order. | |
* @param strings strings to sort | |
* @param compareWith compare them with this string | |
* @return the sorted list | |
*/ | |
QStringList sortStringsByStringEquality(QStringList strings, QString compareWith){ | |
QMap<int, QString> weightedStrings; | |
for(auto it = strings.begin(); it != strings.end(); it++) { | |
int strEqu = this->stringEquality(compareWith, *it); | |
weightedStrings[strEqu] = *it; | |
} | |
QStringList retList; | |
foreach (QString string, weightedStrings){ | |
retList.append(string + "[" + QString::number(this->stringEquality(string, compareWith)) + "]"); | |
} | |
return retList; | |
} | |
bool isSortCmd(QString cmd){ | |
return sortFuncs.count(cmd) > 0; | |
} | |
bool isGroupCmd(QString cmd){ | |
return groupFuncs.count(cmd) > 0; | |
} | |
bool isFilterCmd(QString cmd){ | |
return filterFuncs.count(cmd) > 0; | |
} | |
bool isFilterCSCmd(QString cmd){ | |
return filterCSFuncs.count(cmd) > 0; | |
} | |
/** | |
* @brief Calculates the equality of two strings. | |
* If both strings are only single words, a combination of the levenshtein edit distance and a phonetic matching algorithm is used. | |
* If not only the first is used. | |
* Attention: using a phonetic algorithm is much slower, than the simple levenshtein. | |
* @param str1 first string | |
* @param str2 second string | |
* @return equality of both strings, 0 means both string are equal, the greater the number, the more unequal are both strings. | |
*/ | |
int stringEquality(QString str1, QString str2){ | |
if (this->isSingleWord(str1) && this->isSingleWord(str2)){ | |
return phoneticEquality(str1, str2); | |
} | |
return editDistance(str1, str2); | |
} | |
/** | |
* @brief Implementation of the levenshtein distance, a metric for the edit distance between to strings. | |
* Original source: http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C.2B.2B | |
* @param str1 first string | |
* @param str2 second string | |
* @return edit distance | |
*/ | |
int editDistance(QString str1, QString str2){ | |
const int len1 = str1.size(), len2 = str2.size(); | |
std::vector<int> col(len2+1), prevCol(len2+1); | |
for (int i = 0; i < prevCol.size(); i++) | |
prevCol[i] = i; | |
for (int i = 0; i < len1; i++) { | |
col[0] = i+1; | |
for (int j = 0; j < len2; j++) | |
col[j+1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j]), prevCol[j] + (str1[i] == str2[j] ? 0 : 1) ); | |
col.swap(prevCol); | |
} | |
return prevCol[len2]; | |
} | |
/** | |
* @brief Implementation of a phonetic algorithm to compare two words. | |
* It generates the NYSIIS for both words and returns the levenshtein edit distance between them. | |
* Attention: using a phonetic algorithm is much slower, than the simple levenshtein. | |
* @param word1 first word | |
* @param word2 second word | |
* @return equality of both words, 0 means both words are equal, the greater the number, the more unequal are both words. | |
*/ | |
int phoneticEquality(QString word1, QString word2){ | |
return editDistance(nysiisForWord(word1), nysiisForWord(word2)); | |
} | |
/** | |
* @brief Examines the NYSIIS of the given word. | |
* The NYSIIS is the New York State Identification and Intelligence System Phonetic Code, | |
* http://en.wikipedia.org/wiki/NYSIIS. | |
* @param word given word | |
* @return NYSIIS of the given word | |
*/ | |
QString nysiisForWord(QString word){ | |
if (word.size() == 0){ | |
return ""; | |
} | |
QString code; | |
word = word.toUpper(); | |
word.replace(QRegExp("^MAC"), "MCC"); | |
word.replace(QRegExp("^KN"), "NN"); | |
word.replace(QRegExp("^K"), "C"); | |
word.replace(QRegExp("^P(H|F)"), "FF"); | |
word.replace(QRegExp("^SCH"), "SSS"); | |
word.replace(QRegExp("(EE|IE)$"), "Y"); | |
word.replace(QRegExp("(DT|RT|RD|NT|ND)$"), "D"); | |
code.append(word[0]); | |
word = word.right(word.size() - 1); | |
static QHash<QString, QRegExp> regexps; //good idea to make it static? | |
regexps["AF"] = QRegExp("^EV"); | |
regexps["A"] = QRegExp("^(A|E|I|O|U)"); | |
regexps["G"] = QRegExp("^Q"); | |
regexps["S"] = QRegExp("^Z"); | |
regexps["N"] = QRegExp("^(M|KN)"); | |
regexps["C"] = QRegExp("^K"); | |
regexps["SSS"] = QRegExp("^SCH"); | |
regexps["FF"] = QRegExp("^PH"); | |
while (word.size() > 0){ | |
this->replaceRegExpsInText(regexps, word); | |
if (!(word.startsWith("H") && (!this->isVowel(code[code.size() - 1]) | |
|| (word.size() >= 2 && !this->isVowel(word[1])))) | |
&& !(word.startsWith("W") && this->isVowel(code[code.size() - 1]))){ | |
if (word[0] != code[code.size() - 1]){ | |
code.append(word[0]); | |
} | |
} | |
word = word.right(word.size() - 1); | |
} | |
if (code.endsWith("S")){ | |
code = code.left(code.size() - 1); | |
} | |
if (code.endsWith("AY")){ | |
code.replace(QRegExp("AY$"), "Y"); | |
} else if (code.endsWith("A")){ | |
code = code.left(code.size() - 1); | |
} | |
code = removeRepeatedCharacters(code); | |
return code; | |
} | |
QString removeRepeatedCharacters(const QString &str){ | |
QString res; | |
int i = 0; | |
while (i < str.size() - 1){ | |
if (str[i] != str[i + 1]){ | |
res.append(str[i]); | |
} | |
i++; | |
} | |
res.append(str[str.size() - 1]); | |
return res; | |
} | |
void replaceRegExpsInText(QHash<QString, QRegExp> ®exps, QString &text){ | |
auto it = regexps.constBegin(); | |
while (it != regexps.constEnd()) { | |
text.replace(it.value(), it.key()); | |
++it; | |
} | |
} | |
bool isVowel(const QChar &someChar){ | |
return someChar == 'a' || someChar == 'e' || someChar == 'i' || someChar == 'o' || someChar == 'u' | |
|| someChar == 'ä' || someChar == 'ö' || someChar == 'ü'; | |
} | |
bool isSingleWord(QString &str){ | |
QRegExp regexp("[A-Za-zäöü]+"); | |
return regexp.exactMatch(str); | |
} | |
}; | |
#endif // BERRYENGINE_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef ELEMENTGROUP_H | |
#define ELEMENTGROUP_H | |
#include <functional> | |
#include <QList> | |
#include <QStringList> | |
#include <QString> | |
#include <stdint.h> | |
template<class Element> | |
class ElementGroup | |
{ | |
public: | |
ElementGroup(QStringList _titles, const QList<Element> _elements){ | |
this->elements = _elements; | |
this->titles = _titles; | |
} | |
bool contains(Element element){ | |
return this->elements.contains(element); | |
} | |
const std::vector<Element> getElements(){ | |
return this->elements; | |
} | |
size_t size(){ | |
return this->elements.size(); | |
} | |
const QStringList getTitles(){ | |
return this->titles; | |
} | |
Element get(size_t index){ | |
return this->elements[index]; | |
} | |
private: | |
QStringList titles; | |
QList<Element> elements; | |
}; | |
#endif // ELEMENTGROUP_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "mainwindow.h" | |
#include <QApplication> | |
#include <qstring.h> | |
#include <iostream> | |
#include <berryengine.h> | |
int main(int argc, char *argv[]) | |
{ | |
QApplication a(argc, argv); | |
MainWindow w; | |
w.show(); | |
BerryEngine<QString> engine; | |
engine.addNewElement("3"); | |
auto path = engine.getSuggestions("#sort by asc", 3); | |
for (auto i = path.begin(); i != path.end(); ++i) | |
std::cout << (*i).toStdString() << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment