Skip to content

Instantly share code, notes, and snippets.

@gicmo
Created September 13, 2015 21:38
Show Gist options
  • Save gicmo/9889b2955dca7e68600a to your computer and use it in GitHub Desktop.
Save gicmo/9889b2955dca7e68600a to your computer and use it in GitHub Desktop.
POC to parse a value+uncertainty+unit string
#include <iostream>
#include <string>
#include <vector>
#include <cmath>
namespace odml2 {
ssize_t u8_nbytes(uint8_t bytes) {
std::bitset<8> bs(bytes);
size_t i;
for (i = 0; i < bs.size(); i++) {
if (bs[7 - i] == 0) {
break;
}
}
switch (i) { // 0123 4567
case 0: return 1; // 0??? ????
case 1: return -1; // 10?? ????
case 7: return -1; // 1111 1110
case 8: return -1; // 1111 1111
default: return i; // 1... .10?
}
}
template<typename Iter>
Iter u8to32(Iter begin, Iter end, char32_t &out) {
if (begin == end) {
out = 0xFFFD;
return end;
}
auto ix = *begin++;
uint8_t c1;
memcpy(&c1, &ix, 1);
ssize_t nb = u8_nbytes(c1);
if (nb == 1) {
out = c1;
return begin;
} else if (nb < 1) {
//TODO: synchronize
out = 0xFFFD;
return begin;
}
// nb > 1
out = c1 & (0xFFU >> (nb + 1));
out = out << 6 * (nb - 1);
size_t len = static_cast<size_t>(nb);
size_t i;
for (i = 1; begin != end && i < len; i++) {
ix = *begin++;
uint8_t ch;
memcpy(&ch, &ix, 1);
if ((ch & 0xC0) != 0x80) {
//todo: synchronize
out = 0xFFFD;
return begin;
}
out |= (ch & 0x3F) << 6 * (nb - i - 1);
}
if (i != len) {
// incomplete character
out = 0xFFFD;
}
return begin;
}
static const char* is_pm(const char *str, const char *end) {
char32_t ch;
const char *iter = u8to32(str, end, ch);
if (ch == 0x00B1 || ch == 0x2213) {
return iter;
}
return str;
}
static void parse_val(const std::string &val) {
char *end = nullptr;
const char *begin = val.c_str();
const char *past = begin + val.length();
// try parsing val as a number
double x = strtod(begin, &end);
if (end == begin) {
std::cout << "[S] " << val << std::endl;
return;
}
// check if x is an int
double intp = 0.0;
double frac = modf(x, &intp);
if (fabs(frac) == 0.0) {
// overflow checking, sign, yada yada
int64_t ival = static_cast<int64_t>(intp);
std::cout << "[I] " << ival;
} else {
std::cout << "[F] " << x;
}
size_t len = end - begin;
size_t left = val.length() - len;
if (left == 0) {
std::cout << std::endl;
return;
}
const char *uc = is_pm(end, past);
if (uc != end) {
double ucv = strtod(uc, &end);
std::cout << " uncertainty: " << ucv;
}
if (past - end) {
std::cout << " unit: " << end;
}
std::cout << std::endl;
}
} // odml2::
static std::vector<std::string> tests = {
"42",
"42mV",
u8"42±0.001mV",
u8"3.141±0.1deg",
"String",
"23±mV"
};
int main(int argc, char **argv) {
namespace o2 = odml2;
for (const auto &test : tests) {
std::cout << "Checking: [" << test << "] (" << test.length() << ")" << std::endl;
o2::parse_val(test);
std::cout << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment