Last active
September 26, 2021 06:26
-
-
Save Quackward/655cc02e9366180d31872ce76e099cd9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// [email protected] \(v ` >`)_v | |
// M-maybe don't use this for anything either? It's still in need of testing and stuff | |
// also I OVER DOCUMENTED IT, whoops! | |
// Commandline how to use: | |
// This is an object which parses, and can also build then emit or use, a string in the following format: | |
// | |
// command argA argB argC argD target | |
// [command.exe] [argA's value] [1234] (nil) [How is "this?"] [path/to/target name.txt] | |
// \ \ \ \ \ \ | |
// ___________/ ___________________/ ________/ ____/ _______________________/ ________________________/ | |
// command.exe -argA="argA's value" -argB=1234 -argC --argD="How is ""this?""" "path/to/target name.txt" | |
// | |
// There are 3 tokens within this string: | |
// | |
// > command: This is the very first string that appears, and usually determines the command we are issuing | |
// * Can contain any printable character, and spaces if quotes are used (parsing rules explained in later section) | |
// * A command is optional, but must appear first; if it appears after an argument it will be treated as a "target" | |
// | |
// > target: This is the last non-argument string in the command, and usually determines the target of our command | |
// * Can contain any printable character, and spaces if quotes are used (parsing rules explained in later section) | |
// * A target is optional, but must appear after a command; only the last target to appear in the commandline counts | |
// * A target can appear in the middle of arguments, that is fine. | |
// | |
// > argument: These are key/value pairs that start with a dash [-] and optionally have a string value | |
// * A leading dash [-] must start the argument; you can use as many dashes as you like: [-----arg=longboy] is valid | |
// * Argument keynames should only use alpha, numeric, and an underscore [_], but invalid keys are still accepted | |
// * Argument value strings can contain any printable character, and spaces if quotes are used (explained later) | |
// * The value is optional, writing [-argName= ] or [-argName ] will both generate a key with no value | |
// * In writing the key/value pair, you must use one equal sign [=] with no spaces between them | |
// * The number of arguments allowed is unlimited | |
// * When finding arguments, and when the search would match multiple keys, only the first argument will be found | |
// * When finding arguments, there are three possible results: | |
// > a NULL pointer means nothing with that key exists | |
// > a non-NULL pointer to 0 means a key was found, but no value was assigned to it | |
// > a non-NULL pointer to anything besides 0, is the attached value as a null terminated string | |
// | |
// Parsing rules: | |
// The characters with special meaning are whitespace (any character where `isspace(c)==1`), dashes [-], and quotes ["] | |
// All tokens are seperated by at least one whitespace character; many can be used in a row, use as much as space as you like | |
// If you wish to have a command / target / arg values that contains whitespace, quotes, or leading dashes... use quotes to do so: | |
// | |
// Quotation ["] rules: | |
// * The first quote that appears always starts a quotation block | |
// * Inside a quotation block, all characters, including spaces (but excluding quotes) are considered part of that token | |
// * Inside a quotation block, two quotes found in a row [""] are turned into a single quote ["] and made part of that token | |
// * Inside a quotation block, one quote found without a partner ["] marks the end of the quotation block | |
// * Quotation blocks can be started inside a token; ending a quotation block doesn't end the token, only a non-quoted space can | |
// * If the end of a string is encountered inside a quotation block, it will close the quotation block harmlessly | |
// | |
// Examples: (seperate tokens are contained in [brackets]) | |
// | |
// ___input________________________________________ ___output__________________________________________________ | |
// command.exe -arg=value target.txt >> com[command.exe] tar[target.txt] arg[value] | |
// "C:/Programs and Stuff/command.exe" >> com[C:/Programs and Stuff/command.exe] | |
// cmd.exe -argA -argB= -argC="stuff here" >> com[cmd.exe] argA[] argB[] argC[stuff here] | |
// cmd.exe -arg=complex" ""lil"" argu"ment >> com[cmd.exe] arg[complex "lil" argument] | |
// cmd.exe -arg=justOne""""quote""""Plz >> com[cmd.exe] arg[justOne"quote"Plz] | |
// -arg_name_01="also commands are" optional.txt >> arg_name_01[also commands are] tar[optional.txt] | These are some | |
// cmd.exe -argA -argA=why?? targetA targetB >> com[cmd.exe] argA[] argA[why??] tar[targetB] | examples of | |
// cmd.exe -argA=messed up -argB=careful >> com[cmd.exe] argA[messed] tar[up] argB[careful] | inputs that may | |
// cmd.exe -test="thisIsMalformedButMightWorkOut >> com[cmd.exe] test[thisIsMalformedButMightWork] | cause you Hijinks | |
// cmd.exe -test=orItMightNot" -test=see? >> com[cmd.exe] test[orItMightNot -test=see?] | ~mind yourself~ | |
// | |
// | |
// Building rules: | |
// You can use the constructor to parse an initial string into the object, | |
// but you can also use `setCommand()` `setTarget()` `addArg()` `pruneArg()` to build a Commandline string pragmatically | |
// When using these functions, it's possible to not "parse" them; what this means is that you give them ANYTHING you like, | |
// including quotes and spaces, without respect to parsing rules, and the function will make it work out if you | |
// don't tell it to "parse" them. If you DO tell them to "parse," or use the Constructor, you must respect quote and space rules. | |
// | |
// Emit rules: | |
// The emitted string will largely reflect the input string, though certain string transformations may occur to galvanize the | |
// output string to ensure it's validity when reading it back in. It will also crunch whitespace and leading dashes, where it can | |
// Additionally, it will output ALL arguments, even if multiple exist with the same name, | |
// and will output the target and arguments in the order they were parsed or added. | |
// | |
// | |
// Performance and Apocrypha | |
// * The constructor performs one allocation (zero if move constructor), no matter how large the input. However, this string | |
// can be shrunk after parsing, sometimes significantly. If Keeping this object around for a long time, calling | |
// `_shrinkBuffer()` may save mem as it shrinks to the smallest allocation. | |
// * `findArg()` isn't especially expensive, but it does search the entire internal buffer each time it is run. | |
// * `getCommand()` and `getTarget()` do not need to search however, and will return their values immediately | |
// * All non-const methods except `pruneArg()` can potentially realloc, for that reason don't keep pointers after calling one | |
// * Commands and targets cannot use leading dashes [-] outside of quotes, but an argument's value can. | |
// * Dashes will be obliterated harmlessly if they are aren't starting an argument and instead only lead to whitespace | |
class Commandline { | |
public: | |
Commandline(); | |
Commandline(const char * str, const char ** endPtr = NULL, const char * commentToken = "#", bool allowNewlines = true); | |
Commandline(const Commandline & other); | |
Commandline(Commandline && other); | |
Commandline & operator=(const Commandline & other); | |
Commandline & operator=(Commandline && other); | |
// parses a multi-line argument with comments | |
// out of necessity, assumes the command exists, and target will exist on the same line as it, but arguments can appear on their own lines after these | |
// comments cannot be started in the middle of a comment, target, or argument (they must be outside of a quote block and have a prior space) | |
// stops reading when a new commandline could possibly be made, or if the end of the string is encountered | |
// if endPtr is not NULL, will fill it with address to the start of the first unused line (this may be `str` and not have a command itself on it) | |
// if no Commandline could be found, the commandline returned will have no command or args, and so `isEmpty()` will be true | |
// WARNING: if a target appears on a second line after an argument, it will count as our target (this is probably a sign you wrote it wrong, but that's on you) | |
//static Commandline asParse(const char * str, const char ** endPtr = NULL, const char * commentOpen = "#", bool optimize = true, uint32 tabSize = 4); | |
bool operator<(const Commandline & rhs) const; | |
// returns NULL if no args matching identifier are found; returns ptr to 0 if an arg exists but has no argument; returns ptr to value string otherwise | |
// in the event there are multiple args with matching identifier (which CAN change depending on ignoreCase) this returns the first argument added only | |
// `key` MUST contain only ` alnum(c)||c=='_' ` characters; key doesn't need to include leading '-', (but it can deal if you do) | |
// if ` key==0 || *key==0 ` this function returns NULL | |
const char * findArg(const char * key, bool ignoreCase = true) const; // returns null if none found, only valid until a non-const method on this object is called | |
const char * getCommand() const; // returns null if none found, only valid until a non-const method on this object is called | |
const char * getTarget() const; // returns null if none found, only valid until a non-const method on this object is called | |
bool hasArgs() const; /// this doesn't return count cuz multiple keys of the same name ruin logic using a count | |
bool isEmpty() const; | |
// returns the arg in it's raw form (but without the '-') as [argName=argValueString] | |
// returns NULL if no arg at the index, and thus we are at the end | |
const char * getArgKey(uint32 index) const; | |
// returns the argument to a key you got from getArgKey() (use find() unless you really need this for args as index, as this is waaaay slower) | |
// returns NULL if passed NULL, returns pointer to 0 if argument has no value | |
const char * getArgValue(const char * argKeyAsReturned) const; | |
// this function either sets the command, or replaces the command if one is already set | |
// if ` useParsingRules == true ` this func will add quotes as necessary to make your input work, if it contains spaces or quotes | |
// if ` useParsingRules == false ` this func WON'T do that; you must respect parsing rules (but dangling quote blocks are still auto closed) | |
// if str is NULL, command is found and removed | |
// WARNING: If the target is directly after the command with no arguments inbetween, the target becomes the command. | |
void setCommand(const char * str, bool useParsingRules = false); | |
// this function either sets the target, or replaces the target if one is already set | |
// a target is optional and can appear anywhere, so long as a command exists | |
// if there is no command, this target will be treated as a command; if a command is added later, this stops being the case | |
// target will be added after args already added, but before args not yet added (though this doesn't matter too much) | |
// if ` useParsingRules == true ` this func will add quotes as necessary to make your input work, if it contains spaces or quotes | |
// if ` useParsingRules == false ` this func WON'T do that; you must respect parsing rules (but dangling quote blocks are still auto closed) | |
// if str is NULL, target is found and removed | |
void setTarget(const char * str, bool useParsingRules = false); | |
// this function adds an argument, even if an argument of that name already exists | |
// `key` MUST contain only `alnum(c)||c=='_'` characters; key doesn't need to include leading '-', (but it can deal if you do) | |
// if ` useParsingRules == true ` this func will add quotes as necessary to make your input work, if it contains spaces or quotes | |
// if ` useParsingRules == false` this func WON'T do that; you must respect parsing rules (but dangling quote blocks are still auto closed) | |
// if ` key==0 || *key==0 ` this function does nothing | |
// if ` argStr==0 || *argStr==0 ` this argument will be added without a value | |
void addArg(const char * key, const char * argStr, bool useParsingRules = false); | |
// this function either adds an argument if no key for it exists, or replaces all existing ones found using the set case sensitivity | |
// `key` MUST contain only `alnum(c)||c=='_'` characters; key doesn't need to include leading '-', (but it can deal if you do) | |
// if ` useParsingRules == false ` this func will add quotes as necessary to make your input work, if it contains spaces or quotes | |
// if ` useParsingRules == true ` this func WON'T do that; you must respect parsing rules (but dangling quote blocks are still auto closed) | |
// if ` key==0 || *key==0 ` this behaves the same as `pruneArg(allInstances);` this function is basically just `pruneArg(); addArg();` | |
// if ` argStr==0 || *argStr==0 ` this argument will be added without a value, or replaced to have no value | |
// unlike pruneArg(), this method can only prunes all arguments which match the provided key under the `ignoreCase` rule set | |
void setArg(const char * key, const char * argStr, bool useParsingRules = false, bool ignoreCaseInFindAndReplace = true); | |
// finds the argument set to this key and removes it completely | |
// `key` MUST contain only `alnum(c)||c=='_'` characters; key doesn't need to include leading '-', (but it can deal if you do) | |
// if `pruneAllInstances` prunes ALL arguments which match the provided key under the `ignoreCase` rule set, otherwise only prunes first | |
// WARNING: If the target would end up in front of the string after removing this argument, the target will become the command. | |
void pruneArg(const char * key, bool pruneAllInstances = true, bool ignoreCase = true); | |
// same as `pruneArgs()` but with an inbuilt loop for convenience | |
// prunes all instances of the key with `pruneAllInstances` set | |
void pruneArgs(const char ** keys, uint32 cnt, bool ignoreCase = true); | |
// leaves only that which is requested, reorders all arguments, puts target last, eliminates multiples, and optionally performs case crunching. | |
// similarly, the string is galvanized as emit() would've performed | |
// This is useful for using a Commandline as a key, where keys with functionally similar effects should be identical | |
// if `keysToCaseOfProvidedKeys` is false, different cases are ignored and thus not included | |
void preen(const char ** keysToKeep, uint32 keyCount, bool keysToCaseOfProvidedKeys); | |
// generates and galvanizes a concise generated string that, were it passed into Commandline(), would restore our current state | |
// "galvanize" means the string emitted will always be valid and respect parsing rules, though it may not be the same as the string given | |
// However! command, target, and args, will all appear in the output in the same order they were added, unless `appendTargetWhere` is set | |
// if `appendTargetWhere` is positive, it will go last, if it is negative, it will go after the command, if it is 0, it stays put | |
// seperator arguments decide what goes before each argument/target, usually a space (nothing goes before the command as it's always first) | |
std::string emit(int appendTargetWhere = 0, const char * seperatorIntoTarget = " ", const char * seperatorIntoArgs = " ") const; | |
// this is meant to be used in tandem with "asParse()" as a parsing/emiting tool for documents (you should give the same str address here as you did there) | |
// will leave comments alone and try to recreate the same alignment with whitespace, | |
// anything that could be construed as a command, target, or argument will be added/replaced/removed as needed | |
// out of necessity, assumes the command exists, and target will exist on the same line as it, but arguments can appear on their own lines after these | |
// stops writing when a new commandline could possibly be interpreted from str, or if the end of the string is encountered | |
// returns a string with the dropin replacement you can stick between `str` and whatever `endPtr` returned, when both are used in `asParse()` | |
// WARNING: expects the same str and comment parameters as before, if you want a dropin replacement, otherwise it'll give you something uh else? | |
std::string emitAsParse(const char * str, bool insertNewArgsAfterTarget = false, const char * seperatorIntoArgs = " ", const char * commentOpen = "#", uint32 tabSize = 4); | |
private: | |
//bool m_optimized {0}; | |
uint32 m_tokenCnt {0}; | |
//uint32 m_internalSize {0}; | |
char * m_command {0}; | |
char * m_target {0}; | |
std::string m_str; | |
}; | |
// ----- IMPLEMENTATION ------------------------------------------------------------------------------------------------ | |
#include "Commandline.h" | |
#include "Log.h" | |
#include <map> | |
// HELPER FUNCS ------------------------------------------------------------------------------------------------------------ | |
// stops on either 0 or a space while unquoted, or if argRules it'll stop on an unquoted '=' as well | |
// a quotation must be closed before reaching 0 to be considered valid | |
// encountering a quote sets quotation mode, encountering a single quote (but not [""]) ends quotation mode | |
// [""] are only valid while in quotation mode, thus: | |
// [zz""""zz] >> [zz"zz]; [zz""zz] >> [zzzz]; ["zz""zz"] >> [zz"zz]; ["zz"zz] >> [zzzz]; ["zz"zz"] >> [INVALID]; | |
// also interestingly, [zz" "zz] == ["zz zz"], which lets us build tokens with spaces or quotes in a weird way if required | |
// since we can only shrink, there's no need to make sure there's room | |
// if the token encounters the end of the string with an open quotation, it will consider it closed in triage | |
// returns number of characters written | |
// str + result = end token (1 past last) | |
// if newlineOut isn't null, it will be incremented for every newline we eat (it WON'T eat the last one if we end on one) | |
// dest is updated | |
uint32 parseToken(char * dest, const char * str, const char ** endPos, bool useArgRules, uint32 * newlinesOut = NULL) { | |
uint32 count = 0; | |
if (!str) | |
return 0; | |
bool quoted = false; | |
while (*str) { | |
if (quoted) { | |
if(*str == '\n' && newlinesOut) { | |
++*newlinesOut; | |
} else if (*str == '\"') { | |
if (str[1] == '\"') { | |
++str; | |
} else { | |
quoted = false; | |
++str; | |
continue; | |
} | |
} | |
} else { | |
if (*str == '\"') { | |
quoted = true; | |
++str; | |
continue; | |
} else if (isspace(*str) || useArgRules && *str=='=') { | |
break; | |
} | |
} | |
if (dest && dest != str) | |
*dest = *str; | |
if(dest) | |
++dest; | |
++count; | |
++str; | |
} | |
if(endPos) | |
*endPos = str; | |
return count; | |
} | |
// stops only on 0 | |
// returns a size greater than strlen(str) if galvanizing would transform string | |
// returns a size == strlen(str) if galvanizing would have no effect | |
uint32 getSizeAfterGalvanizing(const char * str) { | |
if(!str) | |
return 0; | |
uint32 cnt = 0; | |
bool galvanizeRequired = false; | |
while (str[cnt]) { | |
if(isspace(str[cnt])) | |
galvanizeRequired = true; | |
else if (str[cnt] == '\"') { | |
galvanizeRequired = true; | |
++cnt; | |
} | |
++cnt; | |
} | |
if(galvanizeRequired) | |
cnt += 2; | |
return cnt; | |
} | |
// will append to out | |
void performGalvanize(std::string & out, const char * in) { | |
if(!in) | |
return; | |
uint32 priorSize = strlen(in); | |
uint32 postSize = getSizeAfterGalvanizing(in); | |
if (priorSize == postSize) { | |
out.append(in); | |
return; | |
} | |
out.push_back('\"'); | |
while (*in) { | |
if (*in == '\"') { | |
out.push_back('\"'); | |
} | |
out.push_back(*in); | |
++in; | |
} | |
out.push_back('\"'); | |
} | |
void replace(std::string & base, char * destStart, uint32 destSize, const char * replacementStr, | |
bool parseReplacement, bool parseWithArgRules, | |
char *& commandPtr, char *& targetPtr, const char * prependStr = NULL) | |
{ | |
uint32 replacementSize = 0; | |
uint32 prependSize = 0; | |
if (replacementStr) | |
replacementSize = strlen(replacementStr) + 1; | |
if (prependStr) { | |
prependSize = strlen(prependStr); | |
replacementSize += prependSize; | |
} | |
uint32 finalSize = replacementSize; | |
if (parseReplacement && replacementStr) { | |
finalSize = parseToken(NULL, replacementStr, NULL, false); | |
} | |
char * priorPtr = &base[0]; | |
char * basePtr = priorPtr; | |
uint32 offsetIntoBase = destStart - priorPtr; | |
if (finalSize != destSize) { | |
// must expand to incorporate our replacement | |
if (destSize > finalSize) { | |
base.erase(offsetIntoBase, destSize - finalSize); | |
} else { | |
base.insert(offsetIntoBase, finalSize - destSize, 0); | |
} | |
char * basePtr = &base[0]; | |
if(targetPtr) targetPtr = basePtr + (targetPtr - priorPtr); | |
if(commandPtr) commandPtr = basePtr + (commandPtr - priorPtr); | |
if(targetPtr - basePtr > offsetIntoBase) targetPtr += (int32(finalSize) - int32(destSize)); | |
if(commandPtr - basePtr > offsetIntoBase) commandPtr += (int32(finalSize) - int32(destSize)); | |
} | |
if (finalSize) { | |
if(prependStr) | |
memcpy(destStart, prependStr, prependSize); | |
if (parseReplacement) { | |
parseToken(destStart + prependSize, replacementStr, NULL, parseWithArgRules); | |
} else { | |
memcpy(destStart + prependSize, replacementStr, finalSize); | |
} | |
} | |
} | |
struct ParseState { | |
ParseState(const char * str, const char * commentStr = "#") : _str(str), _commentStr(commentStr) {} | |
const char * pos {0}; | |
const char * posArgKey {0}; | |
const char * posArgValue {0}; | |
uint32 posSize {0}; | |
uint32 posArgKeySize {0}; | |
uint32 posArgValueSize {0}; | |
uint32 linesTraversed {0}; | |
enum { | |
atStart, | |
atEnd, | |
atCom, | |
atTar, | |
atArg | |
} type {atStart}; | |
const char * _str; | |
const char * _commentStr; | |
uint32 _commentStrSize {0}; | |
bool _pastCommand {0}; | |
}; | |
void parse(ParseState & state) { | |
if (state.type == ParseState::atStart) { | |
// put any init stuff here I guess | |
state._commentStrSize = state._commentStr ? strlen(state._commentStr) : 0; | |
} | |
state.type = ParseState::atEnd; | |
if (!state._str) | |
return; | |
const char *& ch = state._str; | |
bool inArg = false; | |
while (*ch) { | |
if (isspace(*ch)) { | |
if (*ch == '\n') | |
++state.linesTraversed; | |
++ch; | |
continue; | |
} | |
if (state._commentStr && 0 == strncmp(state._commentStr, ch, state._commentStrSize)) { | |
while(*ch && *ch != '\n') | |
++ch; | |
++state.linesTraversed; | |
++ch; | |
continue; | |
} | |
if (*ch == '-') { | |
// go through multiple leading --- | |
state.pos = ch; | |
++ch; | |
while(*ch == '-') | |
++ch; | |
// tried something like " - -arg"? with a lone " - "? you can't hurt me I'll define all your lamewad stunts, ignoring the lone - | |
if (isspace(*ch)) { | |
++ch; | |
state.pos = 0; | |
continue; | |
} | |
const char * last = ch; | |
parseToken(NULL, ch, &ch, true, &state.linesTraversed); | |
if (last != ch) { | |
state.posArgKey = last; | |
state.posArgKeySize = ch - last; | |
if (*ch == '=') { | |
++ch; | |
last = ch; | |
parseToken(NULL, ch, &ch, false, &state.linesTraversed); | |
} else { | |
last = ch; | |
} | |
// both empty args and valued args are handled here | |
state.posArgValue = last; | |
state.posArgValueSize = ch - last; | |
state.posSize = ch - state.pos; | |
state.type = ParseState::atArg; | |
state._pastCommand = true; | |
return; | |
} | |
// identifier had an issue or was of zero size, triage by skipping it and it's value too | |
++ch; | |
state.pos = 0; | |
continue; | |
} | |
// must be a command or target | |
state.pos = ch; | |
const char * last = ch; | |
parseToken(NULL, ch, &ch, false, &state.linesTraversed); | |
if (last != ch) { | |
// both empty args and valued args are handled here | |
state.posSize = ch - last; | |
state.type = state._pastCommand ? ParseState::atTar : ParseState::atCom; | |
state._pastCommand = true; | |
return; | |
} | |
// identifier had an issue or was of zero size, triage by skipping it and it's value too | |
++ch; | |
state.pos = 0; | |
continue; | |
} | |
state.type = ParseState::atEnd; | |
return; | |
} | |
// ----- COMMANDLINE ------------------------------------------------------------------------------------------------ | |
Commandline::Commandline() | |
: m_tokenCnt ( 0 ) | |
, m_command ( 0 ) | |
, m_target ( 0 ) | |
{ | |
} | |
Commandline::Commandline(const char * str, const char ** endPtr, const char * commentToken, bool allowNewlines) { | |
uint32 falseTargets = 0; | |
uint32 finalSize = 0; | |
uint32 lineStarted = 0xFFFFFFFF; | |
const char * lastPtr = str; | |
{ | |
ParseState state(str, commentToken); | |
uint32 targetSize = 0; | |
do { | |
parse(state); | |
if(!allowNewlines && state.linesTraversed) | |
break; | |
if (lineStarted == 0xFFFFFFFF) | |
lineStarted = state.linesTraversed; | |
if (state.type == ParseState::atTar) { | |
if (lineStarted < state.linesTraversed) { | |
state.type = ParseState::atEnd; | |
goto nextPhase; | |
} | |
if (targetSize) { | |
finalSize -= targetSize + 1; | |
++falseTargets; | |
} | |
targetSize = state.posSize; | |
finalSize += state.posSize + 1; | |
} | |
if (state.type == ParseState::atArg) { | |
finalSize += state.posArgKeySize + 2; | |
if (state.posArgValueSize) { | |
finalSize += state.posArgValueSize + 2; | |
} | |
++m_tokenCnt; | |
} else { | |
if (lineStarted < state.linesTraversed) { | |
state.type = ParseState::atEnd; | |
goto nextPhase; | |
} | |
finalSize += state.posSize + 1; | |
} | |
lastPtr = state._str; | |
} while(state.type != ParseState::atEnd); | |
nextPhase: | |
// be sure to keep the extra char as a void space behind the last token | |
if(endPtr) | |
*endPtr = lastPtr; | |
} | |
m_str.resize(finalSize); | |
ParseState state = ParseState(str, commentToken); | |
char * ch = &m_str[0]; | |
do { | |
parse(state); | |
if(!allowNewlines && state.linesTraversed) | |
break; | |
switch (state.type) { | |
case ParseState::atCom: | |
if (lineStarted < state.linesTraversed) { | |
return; | |
} | |
{ | |
const char * check; | |
m_command = ch; | |
ch += parseToken(ch, state.pos, &check, false); | |
assert(check - state.pos == state.posSize); | |
} | |
break; | |
case ParseState::atTar: | |
if (lineStarted < state.linesTraversed) { | |
return; | |
} | |
if (falseTargets) { | |
--falseTargets; | |
} else { | |
const char * check; | |
m_target = ch; | |
ch += parseToken(ch, state.pos, &check, false); | |
assert(check - state.pos == state.posSize); | |
} | |
break; | |
case ParseState::atArg: | |
{ | |
*ch = '-'; | |
++ch; | |
const char * check; | |
ch += parseToken(ch, state.posArgKey, &check, true); | |
assert(check - state.posArgKey == state.posArgKeySize); | |
if (state.posArgValueSize) { | |
*ch = '='; | |
++ch; | |
ch += parseToken(ch, state.posArgValue, &check, false); | |
assert(check - state.posArgValue == state.posArgValueSize); | |
} | |
} | |
break; | |
} | |
*ch = 0; | |
++ch; | |
} while(state.type != ParseState::atEnd); | |
} | |
Commandline::Commandline(const Commandline & other) | |
: m_tokenCnt( other.m_tokenCnt ) | |
{ | |
const char * oldPtr = &other.m_str[0]; | |
m_str = other.m_str; | |
char * newPtr = &m_str[0]; | |
m_command = other.m_command - (char*)oldPtr + newPtr; | |
m_target = other.m_target - (char*)oldPtr + newPtr; | |
} | |
Commandline::Commandline(Commandline && other) | |
: m_tokenCnt( other.m_tokenCnt ) | |
{ | |
char * oldPtr = &other.m_str[0]; | |
m_str = std::move(other.m_str); | |
char * newPtr = &m_str[0]; | |
m_command = other.m_command - oldPtr + newPtr; | |
m_target = other.m_target - oldPtr + newPtr; | |
other.m_tokenCnt = 0; | |
other.m_command = 0; | |
other.m_target = 0; | |
} | |
Commandline & Commandline::operator=(const Commandline & other) { | |
if (this != &other) { | |
const char * oldPtr = &other.m_str[0]; | |
m_str = other.m_str; | |
char * newPtr = &m_str[0]; | |
m_command = other.m_command - (char*)oldPtr + newPtr; | |
m_target = other.m_target - (char*)oldPtr + newPtr; | |
} | |
return *this; | |
} | |
Commandline & Commandline::operator=(Commandline && other) { | |
if (this != &other) { | |
char * oldPtr = &other.m_str[0]; | |
m_str = std::move(other.m_str); | |
char * newPtr = &m_str[0]; | |
m_command = other.m_command - oldPtr + newPtr; | |
m_target = other.m_target - oldPtr + newPtr; | |
m_tokenCnt = other.m_tokenCnt ; | |
other.m_tokenCnt = 0; | |
other.m_command = 0; | |
other.m_target = 0; | |
} | |
return *this; | |
} | |
bool Commandline::operator<(const Commandline & rhs) const { | |
return m_str < rhs.m_str; | |
} | |
const char * Commandline::findArg(const char * argStr, bool ignoreCase) const { | |
if(!argStr || !*argStr || !m_command || !m_tokenCnt) | |
return NULL; | |
const char * ch = &m_str[0]; | |
const char * end = ch + m_str.size(); | |
while(*ch) | |
++ch; | |
++ch; | |
uint32 n = 0; | |
while (n < m_tokenCnt && ch != end) { | |
if (*ch == '-') { | |
++ch; | |
while(*ch == '-') | |
++ch; // allows us to lead with as many - as we like | |
const char * srch = ch; | |
const char * cmp = argStr; | |
while (*cmp) { | |
if(ignoreCase ? tolower(*cmp) != tolower(*srch) : *cmp != *srch) | |
break; | |
++cmp; | |
++srch; | |
} | |
if (!*cmp && (!*srch || *srch == '=')) { | |
// found it | |
return srch+(*srch=='='); | |
} | |
++n; | |
} else { | |
while(*ch) | |
++ch; | |
} | |
++ch; | |
} | |
return NULL; | |
} | |
const char * Commandline::getCommand() const { | |
return m_command; | |
} | |
const char * Commandline::getTarget() const { | |
return m_target; | |
} | |
bool Commandline::hasArgs() const { | |
return m_tokenCnt != 0; | |
} | |
bool Commandline::isEmpty() const { | |
return !getCommand() && !hasArgs(); | |
} | |
const char * Commandline::getArgKey(uint32 index) const { | |
if(index > m_tokenCnt) | |
return NULL; | |
const char * ch = &m_str[0]; | |
const char * end = ch + m_str.size(); | |
while(*ch) | |
++ch; | |
++ch; | |
uint32 n = 0; | |
while (n < m_tokenCnt && ch != end) { | |
if (*ch == '-') { | |
++ch; | |
while(*ch == '-') | |
++ch; // allows us to lead with as many - as we like | |
if (n == index) { | |
return ch; | |
} | |
++n; | |
} else { | |
while(*ch) | |
++ch; | |
} | |
++ch; | |
} | |
return NULL; | |
} | |
const char * Commandline::getArgValue(const char * argKeyAsReturned) const { | |
if(!argKeyAsReturned) | |
return NULL; | |
while(*argKeyAsReturned && *argKeyAsReturned != '=') | |
++argKeyAsReturned; | |
return argKeyAsReturned+(*argKeyAsReturned=='='); | |
} | |
void Commandline::setCommand(const char * str, bool useParsingRules) { | |
if (!str || *str == 0) { | |
// remove | |
if(!m_command) | |
return; | |
replace(m_str, m_command, strlen(m_command) + 1, NULL, useParsingRules, false, m_command, m_target); | |
m_command = NULL; | |
if (m_target == &m_str[0]) { | |
m_command = m_target; | |
m_target = NULL; | |
} | |
} else { | |
replace(m_str, m_command, strlen(m_command) + 1, str, useParsingRules, false, m_command, m_target); | |
} | |
} | |
void Commandline::setTarget(const char * str, bool useParsingRules) { | |
if (!str || *str == 0) { | |
// remove | |
if(!m_target) | |
return; | |
replace(m_str, m_target, strlen(m_target) + 1, NULL, useParsingRules, false, m_command, m_target); | |
m_target = NULL; | |
} else { | |
replace(m_str, m_target, strlen(m_target) + 1, str, useParsingRules, false, m_command, m_target); | |
if (m_target == &m_str[0]) { | |
m_command = m_target; | |
m_target = NULL; | |
} | |
} | |
} | |
void Commandline::addArg(const char * key, const char * argStr, bool useParsingRules) { | |
if(!key || !*key) | |
return; | |
replace(m_str, &m_str[0] + m_str.size(), 0, key, useParsingRules, true, m_command, m_target, "-"); | |
++m_tokenCnt; | |
if (argStr) | |
replace(m_str, &m_str[0] + m_str.size(), 0, key, useParsingRules, false, m_command, m_target, "="); | |
} | |
void Commandline::setArg(const char * key, const char * argStr, bool useParsingRules, bool ignoreCaseInFindAndReplace) { | |
if(!key || !*key) | |
return; | |
pruneArg(key, true, ignoreCaseInFindAndReplace); | |
addArg(key, argStr, useParsingRules); | |
} | |
void Commandline::pruneArg(const char * key, bool pruneAllInstances, bool ignoreCase) { | |
while(*key == '-') | |
++key; | |
if(!key || !*key || !m_tokenCnt) | |
return; | |
char * arg; | |
char * base = &m_str[0]; | |
while (arg = (char*)findArg(key, ignoreCase)) { | |
do { | |
--arg; | |
}while(arg != base && arg[-1]); | |
replace(m_str, arg, strlen(arg) + 1, NULL, false, false, m_command, m_target); | |
--m_tokenCnt; | |
} | |
if (m_target == &m_str[0]) { | |
m_command = m_target; | |
m_target = NULL; | |
} | |
} | |
void Commandline::pruneArgs(const char ** ids, uint32 cnt, bool ignoreCase) { | |
if(!ids || !cnt) | |
return; | |
for (uint32 n = 0; n < cnt; ++n) { | |
pruneArg(ids[n], true, ignoreCase); | |
} | |
} | |
void Commandline::preen(const char ** keysToKeep, uint32 keyCount, bool keysToCaseOfProvidedKeys) { | |
std::string out; | |
if (m_command) { | |
out.append(m_command); | |
out.push_back(0); | |
} | |
if (keysToKeep) { | |
for (uint32 n = 0; n < keyCount; ++n) { | |
const char * found = findArg(keysToKeep[n], keysToCaseOfProvidedKeys); | |
if (found) { | |
out.push_back('-'); | |
out.append(keysToKeep[n]); | |
if (*found) { | |
out.push_back('='); | |
out.append(found); | |
} | |
out.push_back(0); | |
} | |
} | |
} | |
if (m_target) { | |
out.append(m_target); | |
out.push_back(0); | |
} | |
*this = Commandline(out.c_str()); | |
} | |
std::string Commandline::emit(int appendTargetWhere, const char * seperatorIntoTarget, const char * seperatorIntoArgs) const { | |
std::string dest; | |
const char * start = m_str.c_str(); | |
const char * ch = start; | |
const char * end = start + m_str.size(); | |
uint32 tokenFound = 0; | |
if (m_command) { | |
performGalvanize(dest, m_command); | |
} | |
if (appendTargetWhere < 0 && m_target) { | |
dest.append(seperatorIntoTarget); | |
performGalvanize(dest, m_target); | |
} | |
while (ch < end && tokenFound < m_tokenCnt) { | |
if (*ch == '-') { | |
if(ch != start) | |
dest.append(seperatorIntoArgs); | |
dest.push_back('-'); | |
++ch; | |
while (isalnum(*ch) || *ch == '_') { | |
dest.push_back(*ch); | |
++ch; | |
} | |
if (*ch == '=') { | |
dest.push_back('='); | |
++ch; | |
if (!isspace(*ch)) { | |
performGalvanize(dest, ch); | |
} | |
} | |
++tokenFound; | |
} else { | |
if (!appendTargetWhere && ch == m_target) { | |
dest.append(seperatorIntoTarget); | |
performGalvanize(dest, m_target); | |
} | |
} | |
ch += strlen(ch) + 1; | |
} | |
if (appendTargetWhere > 0 && m_target) { | |
dest.append(seperatorIntoTarget); | |
performGalvanize(dest, m_target); | |
} | |
assert(tokenFound == m_tokenCnt); | |
return dest; | |
} | |
std::string Commandline::emitAsParse(const char * str, bool insertNewArgsAfterTarget, const char * seperatorIntoArgs, const char * commentOpen, uint32 tabSize) { | |
std::string dest; | |
if (!str || !*str) | |
return dest; | |
// TODO | |
/* | |
dest.reserve(strlen(str)); | |
const char * line = str; | |
bool pastFirstline = false; | |
bool inToken = false; | |
uint32 commentOpenSize = 0; | |
if(commentOpen) | |
commentOpenSize = strlen(commentOpen); | |
// go around until at least one token is fully read AND we hit the newline AND encounter a new token, or the end of the str is hit | |
while (*str) { | |
// get rid of all leading whitespace and newlines | |
while (*str) { | |
while (isspace(*str)) | |
dest.push_back(*(str++)); | |
if (commentOpen && 0==strncmp(str, commentOpen, commentOpenSize)) { | |
while(*str && *str != '\n') // don't skip past comment as it may use \n | |
dest.push_back(*(str++)); | |
} else { | |
break; | |
} | |
} | |
// we're now at a potential token (not a comment) | |
if (pastFirstline) { | |
if (*str != '-') { | |
break; // we're at a potentially new command, end it here | |
} | |
} | |
// get as many tokens as we can grab on this line (always start at the next token) | |
while (*str) { | |
// handle special case of args not having quotes | |
if (*str == '-') { | |
const char * keyStart; | |
const char * keyEnd; | |
while (*str && !isspace(*str)) { | |
in.push_back(*str); | |
if (*str == '=') { | |
++str; | |
break; | |
} | |
++str; | |
} | |
} | |
// get this one token | |
bool quote = false; | |
while (*str) { | |
if (!quote) { | |
if(*str == '\"') | |
quote = true; | |
else if(isspace(*str)) | |
break; | |
else | |
in.push_back(*str); | |
} else { | |
if (*str == '\"') { | |
if (str[1] == '\"') { | |
in.push_back('\"'); | |
++str; | |
} else { | |
quote = false; | |
} | |
} else { | |
in.push_back(*str); | |
} | |
} | |
++str; | |
} | |
// handle post-token trailing whitespace and comments until newline | |
while (isspace(*str) && *str != '\n') | |
++str; | |
if (commentOpen && 0==strncmp(str, commentOpen, commentOpenSize)) { | |
while(*str && *str != '\n') // don't skip past comment as it may use \n | |
++str; | |
} | |
if (*str == '\n') { | |
++str; | |
line = str; | |
break; | |
} | |
// if none of these, just start the next token | |
} | |
pastFirstline = true; | |
} | |
if(endPtr) | |
*endPtr = line; | |
return Commandline(std::move(in), optimize); | |
//*/ | |
return dest; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment