Created
September 13, 2015 21:38
-
-
Save gicmo/9889b2955dca7e68600a to your computer and use it in GitHub Desktop.
POC to parse a value+uncertainty+unit string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <cmath> | |
namespace odml2 { | |
ssize_t u8_nbytes(uint8_t bytes) { | |
std::bitset<8> bs(bytes); | |
size_t i; | |
for (i = 0; i < bs.size(); i++) { | |
if (bs[7 - i] == 0) { | |
break; | |
} | |
} | |
switch (i) { // 0123 4567 | |
case 0: return 1; // 0??? ???? | |
case 1: return -1; // 10?? ???? | |
case 7: return -1; // 1111 1110 | |
case 8: return -1; // 1111 1111 | |
default: return i; // 1... .10? | |
} | |
} | |
template<typename Iter> | |
Iter u8to32(Iter begin, Iter end, char32_t &out) { | |
if (begin == end) { | |
out = 0xFFFD; | |
return end; | |
} | |
auto ix = *begin++; | |
uint8_t c1; | |
memcpy(&c1, &ix, 1); | |
ssize_t nb = u8_nbytes(c1); | |
if (nb == 1) { | |
out = c1; | |
return begin; | |
} else if (nb < 1) { | |
//TODO: synchronize | |
out = 0xFFFD; | |
return begin; | |
} | |
// nb > 1 | |
out = c1 & (0xFFU >> (nb + 1)); | |
out = out << 6 * (nb - 1); | |
size_t len = static_cast<size_t>(nb); | |
size_t i; | |
for (i = 1; begin != end && i < len; i++) { | |
ix = *begin++; | |
uint8_t ch; | |
memcpy(&ch, &ix, 1); | |
if ((ch & 0xC0) != 0x80) { | |
//todo: synchronize | |
out = 0xFFFD; | |
return begin; | |
} | |
out |= (ch & 0x3F) << 6 * (nb - i - 1); | |
} | |
if (i != len) { | |
// incomplete character | |
out = 0xFFFD; | |
} | |
return begin; | |
} | |
static const char* is_pm(const char *str, const char *end) { | |
char32_t ch; | |
const char *iter = u8to32(str, end, ch); | |
if (ch == 0x00B1 || ch == 0x2213) { | |
return iter; | |
} | |
return str; | |
} | |
static void parse_val(const std::string &val) { | |
char *end = nullptr; | |
const char *begin = val.c_str(); | |
const char *past = begin + val.length(); | |
// try parsing val as a number | |
double x = strtod(begin, &end); | |
if (end == begin) { | |
std::cout << "[S] " << val << std::endl; | |
return; | |
} | |
// check if x is an int | |
double intp = 0.0; | |
double frac = modf(x, &intp); | |
if (fabs(frac) == 0.0) { | |
// overflow checking, sign, yada yada | |
int64_t ival = static_cast<int64_t>(intp); | |
std::cout << "[I] " << ival; | |
} else { | |
std::cout << "[F] " << x; | |
} | |
size_t len = end - begin; | |
size_t left = val.length() - len; | |
if (left == 0) { | |
std::cout << std::endl; | |
return; | |
} | |
const char *uc = is_pm(end, past); | |
if (uc != end) { | |
double ucv = strtod(uc, &end); | |
std::cout << " uncertainty: " << ucv; | |
} | |
if (past - end) { | |
std::cout << " unit: " << end; | |
} | |
std::cout << std::endl; | |
} | |
} // odml2:: | |
static std::vector<std::string> tests = { | |
"42", | |
"42mV", | |
u8"42±0.001mV", | |
u8"3.141±0.1deg", | |
"String", | |
"23±mV" | |
}; | |
int main(int argc, char **argv) { | |
namespace o2 = odml2; | |
for (const auto &test : tests) { | |
std::cout << "Checking: [" << test << "] (" << test.length() << ")" << std::endl; | |
o2::parse_val(test); | |
std::cout << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment