Skip to content

Instantly share code, notes, and snippets.

@cybertxt
Last active July 17, 2019 02:28
Show Gist options
  • Save cybertxt/4352dff38af103e3b6465f788c4854c5 to your computer and use it in GitHub Desktop.
Save cybertxt/4352dff38af103e3b6465f788c4854c5 to your computer and use it in GitHub Desktop.
json parser
/**
# -*- coding:UTF-8 -*-
*/
#include "json_parser.hpp"
#include <assert.h>
static JO _g_jo_empty;
JO::JO()
: m_jo_data({nullptr})
, m_type(jo_type_t::null)
{
}
JO::JO(jo_type_t type)
: JO()
{
set_type(type);
}
JO::~JO()
{
if (m_type == jo_type_t::object)
delete m_object_members;
else if (m_type == jo_type_t::array)
delete m_array_elements;
}
JO::JO(JO&& val)
{
*this = std::move(val);
}
JO& JO::operator=(JO&& val)
{
if (this != &val)
{
m_jo_data = val.m_jo_data; val.m_jo_data = {nullptr};
m_type = val.m_type;
}
return *this;
}
//目前只考虑下面几个控制字符
static const char json_parser_escape_table[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/', //不转义/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\\', 0, 0, 0,
0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0,
0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
#define MAX_JSON_LEN (1*1024*1024*1024)
static int32_t expect_token(char** p_cursor, uint32_t remain_len, char token)
{
if (remain_len > 0)
{
if (**p_cursor == token)
{
++*p_cursor;
return 0;
}
else return -1;
}
else return -1;
}
static void transfer_char(char** s, char c)
{
if (c != 0) //暂不过滤其他字符
{
**s = c;
++*s;
}
}
static int32_t transfer_ucs2_to_utf8(char** s, char** p_cursor, uint32_t remain_len, char quote)
{
(void)quote;
if (remain_len < 5 || remain_len > MAX_JSON_LEN) return -1;
if (isxdigit((*p_cursor)[0]) && isxdigit((*p_cursor)[1])
&& isxdigit((*p_cursor)[2]) && isxdigit((*p_cursor)[3]))
{
uint8_t hi = (get_hexvalue((*p_cursor)[0]) << 4) | get_hexvalue((*p_cursor)[1]);
uint8_t lo = (get_hexvalue((*p_cursor)[2]) << 4) | get_hexvalue((*p_cursor)[3]);
if (hi&0xf8) //3B
{
*(*s)++ = 0xE0 | (hi >> 4);
*(*s)++ = 0x80 | ((hi & 0xf) << 2) | (lo >> 6);
*(*s)++ = 0x80 | (lo & 0x3f);
}
else if (hi) //2B
{
*(*s)++ = 0xC0 | ((hi & 0x07) << 2) | (lo >> 6);
*(*s)++ = 0x80 | (lo & 0x3f);
}
else //1B
{
transfer_char(s, lo);
}
}
else return -1;
*p_cursor += 4;
return 0;
}
int32_t JO::parse_string(char** s, char** p_cursor, uint32_t remain_len, char quote)
{
int32_t ret = 0;
char* cursor = *p_cursor;
char* end = cursor + remain_len;
//if (remain_len > MAX_JSON_LEN) return -1;
while (*cursor != quote && cursor < end)
{
if (*cursor != '\\')
{
transfer_char(s, *cursor);
++cursor;
}
else
{
++cursor;
if (*cursor == 'u')
{
++cursor;
ret = transfer_ucs2_to_utf8(s, &cursor, static_cast<uint32_t>(end - cursor), quote);
if (ret != 0)
{
*p_cursor = cursor;
return ret;
}
}
else if (json_parser_escape_table[CHAR_TO_INT(*cursor)])
{
*(*s)++ = json_parser_escape_table[CHAR_TO_INT(*cursor++)];
}
else if (*cursor == 'x')
{
++cursor;
if (isxdigit(cursor[0]) && isxdigit(cursor[1]))
{
transfer_char(s, (get_hexvalue(cursor[0]) << 4) | get_hexvalue(cursor[1]));
cursor += 2;
}
else
{
*p_cursor = cursor;
return -1;
}
}
else
{
*(*s)++ = cursor[-1];
*(*s)++ = cursor[0];
++cursor;
}
}
}
if (cursor < end)
{
**s = 0;
++cursor;
*p_cursor = cursor;
return 0;
}
else return -1;
}
const char* JO::parse_key(char** p_cursor, uint32_t remain_len)
{
char* key;
char* s;
char quote;
//if (remain_len > MAX_JSON_LEN) return nullptr;
skip_space(*p_cursor);
if (**p_cursor != '"' && **p_cursor != '\'')
{
return nullptr;
}
quote = **p_cursor;
s = ++*p_cursor;
key = s;
if(parse_string(&s, p_cursor, remain_len, quote) == 0) return key;
else return nullptr;
}
int32_t JO::parse_value_string(char** p_cursor, uint32_t remain_len, char quote)
{
char* s = *p_cursor;
if (remain_len > MAX_JSON_LEN) return -1;
set_type(jo_type_t::string);
m_jo_value = s;
return parse_string(&s, p_cursor, remain_len, quote);
}
int32_t JO::parse_value_bool(char** p_cursor, uint32_t remain_len)
{
(void)remain_len;
//skip_space(*p_cursor);
set_type(jo_type_t::boolean);
m_jo_value = *p_cursor;
if (memcmp(*p_cursor, "true", 4) == 0)
{
*p_cursor += 4;
}
else if (memcmp(*p_cursor, "false", 5) == 0)
{
*p_cursor += 5;
}
else return -1;
return 0;
}
int32_t JO::parse_value_number(char** p_cursor, uint32_t remain_len)
{
(void)remain_len;
//if (remain_len > MAX_JSON_LEN) return -1;
set_type(jo_type_t::number);
m_jo_value = *p_cursor;
strtod(*p_cursor, p_cursor); //暂不将数字结尾置0
return 0;
}
void JO::set_type(jo_type_t type)
{
switch (type)
{
case jo_type_t::null:
m_type = type;
break;
case jo_type_t::boolean:
m_type = type;
break;
case jo_type_t::number:
m_type = type;
break;
case jo_type_t::object:
m_type = type;
m_jo_data.object_members = reinterpret_cast<void*>(new jo_map_t);
break;
case jo_type_t::array:
m_type = type;
m_array_elements = new std::vector<JO>;
break;
case jo_type_t::string:
m_type = type;
break;
}
}
int32_t JO::parse_value_array(char** p_cursor, uint32_t remain_len)
{
int32_t ret = -1;
char* cursor = *p_cursor;
char* end = cursor + remain_len;
//if (remain_len > MAX_JSON_LEN) return -1;
set_type(jo_type_t::array);
while (cursor <= end)
{
skip_space(cursor);
if (*cursor == ']')
{
++cursor;
ret = 0;
break;
}
if (*cursor == '"' || *cursor == '\'') //string
{
JO jo;
char quote = *cursor;
++cursor;
ret = jo.parse_value_string(&cursor, static_cast<uint32_t>(end - cursor), quote);
if (ret != 0) break;
else m_array_elements->push_back(std::move(jo));
}
else if (*cursor == '{') //object
{
JO jo;
++cursor;
ret = jo.parse_value_object(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_array_elements->push_back(std::move(jo));
}
else if (*cursor == '[') //array
{
JO jo;
++cursor;
ret = jo.parse_value_array(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_array_elements->push_back(std::move(jo));
}
else if (isdigit(*cursor) || *cursor == '-') //number
{
JO jo;
ret = jo.parse_value_number(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_array_elements->push_back(std::move(jo));
}
else if (*cursor == 't' || *cursor == 'f') //true,false
{
JO jo;
ret = jo.parse_value_bool(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_array_elements->push_back(std::move(jo));
}
else if (*cursor == 'n') //null
{
if (cursor[1] == 'u' && cursor[2] == 'l' && cursor[3] == 'l')
{
JO jo;
cursor += 4;
ret = 0;
jo.set_type(jo_type_t::null);
m_array_elements->push_back(std::move(jo));
}
else
{
ret = -1;
break;
}
}
else
{
ret = -1;
break;
}
skip_space(cursor);
if (*cursor != ']')
{
ret = expect_token(&cursor, static_cast<uint32_t>(end - cursor), ',');
if (ret != 0) break;
}
}
*p_cursor = cursor;
return ret;
}
int32_t JO::parse_value_object(char** p_cursor, uint32_t remain_len)
{
int32_t ret = -1;
char* cursor = *p_cursor;
char* end = cursor + remain_len;
//if (remain_len > MAX_JSON_LEN) return -1;
set_type(jo_type_t::object);
while (cursor <= end)
{
skip_space(cursor);
if (*cursor == '}')
{ //object end
++cursor;
ret = 0;
break;
}
JO jo;
const char* key = jo.parse_key(&cursor, static_cast<uint32_t>(end - cursor));
if (key == nullptr) break;
skip_space(cursor);
ret = expect_token(&cursor, static_cast<uint32_t>(end - cursor), ':');
if (ret != 0) break;
skip_space(cursor);
if (*cursor == '"' || *cursor == '\'') //string
{
char quote = *cursor;
++cursor;
ret = jo.parse_value_string(&cursor, static_cast<uint32_t>(end - cursor), quote);
if (ret != 0) break;
else m_object_members->insert(std::make_pair(key, std::move(jo)));
}
else if (*cursor == '{') //object
{
++cursor;
ret = jo.parse_value_object(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_object_members->insert(std::make_pair(key, std::move(jo)));
}
else if (*cursor == '[') //array
{
++cursor;
ret = jo.parse_value_array(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_object_members->insert(std::make_pair(key, std::move(jo)));
}
else if (isdigit(*cursor) || *cursor == '-') //number
{
ret = jo.parse_value_number(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_object_members->insert(std::make_pair(key, std::move(jo)));
}
else if (*cursor == 't' || *cursor == 'f') //true,false
{
ret = jo.parse_value_bool(&cursor, static_cast<uint32_t>(end - cursor));
if (ret != 0) break;
else m_object_members->insert(std::make_pair(key, std::move(jo)));
}
else if (*cursor == 'n') //null
{
if (cursor[1] == 'u' && cursor[2] == 'l' && cursor[3] == 'l')
{
cursor += 4;
ret = 0;
jo.set_type(jo_type_t::null);
m_object_members->insert(std::make_pair(key, std::move(jo)));
}
else
{
ret = -1;
break;
}
}
else
{
ret = -1;
break;
}
skip_space(cursor);
if (*cursor != '}')
{
ret = expect_token(&cursor, static_cast<uint32_t>(end - cursor), ',');
if (ret != 0) break;
}
}
*p_cursor = cursor;
return ret;
}
bool JO::parse_inplace(char** json, uint32_t remain_len)
{
int32_t ret = -1;
char* cursor = *json;
char* end = cursor + remain_len;
if (remain_len > MAX_JSON_LEN) return false;
skip_space(cursor);
if (*cursor == '{')
{
++cursor;
ret = parse_value_object(&cursor, static_cast<uint32_t>(end - cursor));
}
else if (*cursor == '[')
{
++cursor;
ret = parse_value_array(&cursor, static_cast<uint32_t>(end - cursor));
}
else
{
ret = -1;
}
*json = cursor;
if (ret != 0) return false;
else return true;
}
jo_type_t JO::type() const
{
return m_type;
}
int32_t JO::i32() const
{
if (m_type == jo_type_t::number || m_type == jo_type_t::string)
{
return atoi(m_jo_value);
}
else if (m_type == jo_type_t::boolean)
{
return *m_jo_value == 't' ? 1 : 0;
}
else return 0;
}
int64_t JO::i64() const
{
if (m_type == jo_type_t::number || m_type == jo_type_t::string)
{
return atoll(m_jo_value);
}
else if (m_type == jo_type_t::boolean)
{
return *m_jo_value == 't' ? 1 : 0;
}
else return 0;
}
double JO::dbl() const
{
if (m_type == jo_type_t::number || m_type == jo_type_t::string)
{
return atof(m_jo_value);
}
else if (m_type == jo_type_t::boolean)
{
return *m_jo_value == 't' ? 1 : 0;
}
else return 0;
}
char* JO::str() const
{
if (m_type == jo_type_t::string )
{
return m_jo_value;
}
else return NULL;
}
static char jo_str2_empty_string[4];
char* JO::str2() const
{
if (m_type != jo_type_t::null)
{
return m_jo_value;
}
//else return "";
else return jo_str2_empty_string;
}
bool JO::b() const
{
if (m_type == jo_type_t::boolean) {
return *m_jo_value == 't' ? true : false;
} else {
return false;
}
}
const JO& JO::operator[](const char* key) const
{
assert(m_type == jo_type_t::object);
const auto& it = m_object_members->find(key);
if (it != m_object_members->end()) return it->second;
else return _g_jo_empty;
}
jo_map_t::const_iterator jo_find(const JO& jo, const char* key)
{
return reinterpret_cast<const jo_map_t*>(jo.m_jo_data.object_members)->find(key);
}
jo_map_t::const_iterator jo_begin(const JO& jo)
{
return reinterpret_cast<const jo_map_t*>(jo.m_jo_data.object_members)->begin();
}
jo_map_t::const_iterator jo_end(const JO& jo)
{
return reinterpret_cast<const jo_map_t*>(jo.m_jo_data.object_members)->end();
}
uint32_t JO::size() const
{
if (m_type == jo_type_t::object)
{
return static_cast<uint32_t>(m_object_members->size());
}
else if (m_type == jo_type_t::array)
{
return static_cast<uint32_t>(m_array_elements->size());
}
else
{
return 1;
}
}
bool JO::empty() const
{
if (m_type == jo_type_t::object)
{
return m_object_members->empty();
}
else if (m_type == jo_type_t::array)
{
return m_array_elements->empty();
}
else
{
return false;
}
}
const JO& JO::operator[](int32_t idx) const
{
assert(m_type == jo_type_t::array);
return (*m_array_elements)[idx];
}
#if 0
int main()
{
char s[100] = "{\"a\\x20b\":\"123\\u6211abc\\/\x0d\x0avwx\",'b':[1,2,3,{'ba':'aa','ba':'bb'}],'c':123.12}";
char* json = s;
JO jo;
if(!jo.parse_inplace(&json, strlen(json)))
{
printf("json parse fail at: %s\n", json);
return -1;
}
char* s1 = jo["a b"].str();
/*FILE* f;
fopen_s(&f, "1", "w");
fprintf(f, "%s\n", s1);
fclose(f);*/
printf("a b : %s\n", s1);
const JO& b = jo["b"];
const JO& b3 = b[3];
printf("b0 : %d\n", b[0].i32());
printf("b1 : %d\n", b[1].i32());
printf("b2 : %d\n", b[2].i32());
printf("ba : %s\n", b3["ba"].str());
printf("c : %f\n", jo["c"].dbl());
auto it = jo_find(b3, "ba");
printf("ba : %s\n", it->second.str());
it++;
printf("ba : %s\n", it->second.str());
}
#endif
/**
# -*- coding:UTF-8 -*-
*/
#ifndef _JSON_PARSER_HPP_
#define _JSON_PARSER_HPP_
#include "string_utility.h"
#include <unordered_map>
#include <vector>
enum class jo_type_t
{
null,
boolean,
number,
object,
array,
string
};
class JO
{
public:
union u_jo_data
{
char* value;
void* object_members;
std::vector<JO>* array_elements;
}m_jo_data;
#define m_object_members reinterpret_cast<jo_map_t*>(m_jo_data.object_members)
#define m_array_elements m_jo_data.array_elements
#define m_jo_value m_jo_data.value
jo_type_t m_type;
public:
JO();
JO(jo_type_t type);
~JO();
JO(const JO&) = delete;
JO& operator=(const JO&) = delete;
JO(JO&& val);
JO& operator=(JO&& val);
public:
int32_t i32() const;
int64_t i64() const;
char* str() const; //非string类型返回NULL
char* str2() const; //非string类型转为string,null类型返回空字符串
double dbl() const;
bool b() const;
jo_type_t type() const;
public:
bool parse_inplace(char** json, uint32_t json_len);
bool empty() const;
uint32_t size() const;
const JO& operator[](const char* key) const;
const JO& operator[](int32_t idx) const;
private:
int32_t parse_string(char** s, char** p_cursor, uint32_t remain_len, char quote);
const char* parse_key(char** p_cursor, uint32_t remain_len);
int32_t parse_value_string(char** p_cursor, uint32_t remain_len, char quote);
int32_t parse_value_number(char** p_cursor, uint32_t remain_len);
int32_t parse_value_bool(char** p_cursor, uint32_t remain_len);
int32_t parse_value_object(char** p_cursor, uint32_t remain_len);
int32_t parse_value_array(char** p_cursor, uint32_t remain_len);
private:
void set_type(jo_type_t type);
};
struct CstrHashFunc
{
size_t operator()(const char* s) const
{
return time33_hash(s);
}
size_t operator()(const char* s1, const char* s2) const
{
return strcmp(s1, s2) == 0;
}
};
typedef std::unordered_multimap<const char*, JO, CstrHashFunc, CstrHashFunc> jo_map_t;
//typedef std::unordered_multimap<std::string, JO> jo_map_t;
jo_map_t::const_iterator jo_begin(const JO& jo);
jo_map_t::const_iterator jo_end(const JO& jo);
jo_map_t::const_iterator jo_find(const JO& jo, const char* key);
#endif
/**
# -*- coding:UTF-8 -*-
*/
#include "string_utility.h"
#include <stdlib.h>
#include <string.h>
#ifdef __cplusplus
extern "C"
{
#endif
const char* const tbl_hexvalue =
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff"
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18"
"\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\xff\xff\xff\xff\xff"
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18"
"\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
const char* const tbl_int2hex_lower = "0123456789abcdefghijklmnopqrstuvwxyz";
const char* const tbl_int2hex_upper = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
#ifdef __GNUC__
char* strtolower(char* s)
{
char* p = s;
while(*p)
{
*p = tolower(CHAR_TO_INT(*p));
++p;
}
return s;
}
char* strtoupper(char* s)
{
char* p = s;
while(*p)
{
*p = toupper(CHAR_TO_INT(*p));
++p;
}
return s;
}
#elif defined(_WIN32)
const char* stristr(const char* s1, const char* s2)
{
int32_t n = (int32_t)strlen(s2);
while (*s1 != 0)
{
if (strnicmp(s1, s2, n) == 0)
return s1;
++s1;
}
return NULL;
}
#endif
int64_t atoi64(const char *nptr)
{
skip_space(nptr);
if(*nptr=='-')
{
++nptr;
if(!isdigit(CHAR_TO_INT(*nptr))) return 0;
else return -(int64_t)atou64(nptr);
}
else
{
return atou64(nptr);
}
}
uint64_t atou64(const char *nptr)
{
uint64_t r = 0;
skip_space(nptr);
if(*nptr=='+') ++nptr;
while(isdigit(CHAR_TO_INT(*nptr)))
{
r = r*10 + get_hexvalue(*nptr++);
}
return r;
}
int32_t atoi32(const char *nptr)
{
skip_space(nptr);
if(*nptr=='-')
{
++nptr;
if(!isdigit(CHAR_TO_INT(*nptr))) return 0;
else return -(int32_t)atou64(nptr);
}
else
{
return atou32(nptr);
}
}
uint32_t atou32(const char *nptr)
{
uint32_t r = 0;
skip_space(nptr);
if(*nptr=='+') ++nptr;
while(isdigit(CHAR_TO_INT(*nptr)))
{
r = r*10 + get_hexvalue(*nptr++);
}
return r;
}
int64_t strtoi64(const char *nptr, const char **endptr, int32_t base)
{
skip_space(nptr);
if(*nptr=='-')
{
++nptr;
if(!isalnum(CHAR_TO_INT(*nptr)))
{
if(endptr) *endptr = nptr;
return 0;
}
else
{
return -(int64_t)strtou64(nptr, endptr, base);
}
}
else
{
return strtou64(nptr, endptr, base);
}
}
uint64_t strtou64(const char *nptr, const char **endptr, int32_t base)
{
uint64_t r = 0;
if(2 <= base && base <= 36)
{
skip_space(nptr);
if(*nptr=='+') ++nptr;
while(isalnum(CHAR_TO_INT(*nptr)))
{
int32_t v = get_hexvalue(*nptr);
if (v < base)
{
nptr++;
r = r*base + v;
}
else break;
}
}
else
{
//error
}
if(endptr) *endptr = nptr;
return r;
}
int32_t strtoi32(const char *nptr, const char **endptr, int32_t base)
{
skip_space(nptr);
if(*nptr=='-')
{
++nptr;
if(!isalnum(CHAR_TO_INT(*nptr)))
{
if(endptr) *endptr = nptr;
return 0;
}
else
{
return -(int32_t)strtou32(nptr, endptr, base);
}
}
else
{
return strtou32(nptr, endptr, base);
}
}
uint32_t strtou32(const char *nptr, const char **endptr, int32_t base)
{
uint32_t r = 0;
if(2 <= base && base <= 36)
{
skip_space(nptr);
if(*nptr=='+') ++nptr;
while(isalnum(CHAR_TO_INT(*nptr)))
{
int32_t v = get_hexvalue(*nptr);
if (v < base)
{
nptr++;
r = r*base + v;
}
else break;
}
}
else
{
//error
}
if(endptr) *endptr = nptr;
return r;
}
int32_t is_string_integer(const char* str)
{
skip_space(str);
return isdigit(CHAR_TO_INT(*str)) ||
( (*str=='+' || *str=='-') && isdigit(CHAR_TO_INT(str[1])));
}
int32_t is_string_unsigned_integer(const char* str)
{
skip_space(str);
return isdigit(CHAR_TO_INT(*str)) ||
(*str=='+' && isdigit(CHAR_TO_INT(str[1])));
}
uint32_t strchrcount(const char* str, uint32_t str_len, char chr)
{
uint32_t c = 0;
uint32_t i = 0;
for (; i < str_len; ++i)
{
if (str[i] == chr)
++c;
}
return c;
}
uint32_t i64toa(int64_t n, char* result)
{
if(n>=0)
{
return u64toa(n, result);
}
else
{
*result = '-';
return u64toa(-n, result+1) + 1;
}
}
uint32_t u64toa(uint64_t n, char* result)
{
char buf[32];
int32_t i=31;
int32_t len;
do{
buf[--i] = n%10 + '0';
n /= 10;
}while(n!=0);
len = 31 - i;
memcpy(result, buf+i, len);
result[len] = 0;
return len;
}
uint32_t i32toa(int32_t n, char* result)
{
if(n>=0)
{
return u32toa(n, result);
}
else
{
*result = '-';
return u32toa(-n, result+1) + 1;
}
}
uint32_t u32toa(uint32_t n, char* result)
{
char buf[16];
int32_t i=15;
int32_t len;
do{
buf[--i] = n%10 + '0';
n /= 10;
}while(n!=0);
len = 15 - i;
memcpy(result, buf+i, len);
result[len] = 0;
return len;
}
uint32_t i64tostr(int64_t n, char* result, int32_t base)
{
if(2<=base && base<=36)
{
if(n>=0)
{
return u64tostr(n, result, base);
}
else
{
*result = '-';
return u64tostr(-n, result+1, base) + 1;
}
}
else
{
*result = 0;
return 0;
}
}
uint32_t u64tostr(uint64_t n, char* result, int32_t base)
{
if(2<=base && base<=36)
{
char buf[66];
int32_t i=65;
int32_t len;
do{
buf[--i] = int2hex(n%base);
n /= base;
}while(n!=0);
len = 65 - i;
memcpy(result, buf+i, len);
result[len] = 0;
return len;
}
else
{
*result = 0;
return 0;
}
}
uint32_t i32tostr(int32_t n, char* result, int32_t base)
{
if(n>=0)
{
return u32tostr(n, result, base);
}
else
{
*result = '-';
return u32tostr(-n, result+1, base) + 1;
}
}
uint32_t u32tostr(uint32_t n, char* result, int32_t base)
{
if(2<=base && base<=36)
{
char buf[34];
int32_t i=33;
int32_t len;
do{
buf[--i] = int2hex(n%base);
n /= base;
}while(n!=0);
len = 33 - i;
memcpy(result, buf+i, len);
result[len] = 0;
return len;
}
else
{
*result = 0;
return 0;
}
}
uint32_t string_line_number(const char* str, uint32_t str_len)
{
if(str_len != 0)
{
char line_break = '\n';
uint32_t line_number = 1;
const char* pend = str + str_len;
for( ; str<pend; ++str)
{
if(*str=='\r' || *str=='\n')
{
line_break = *str++;
break;
}
}
if(str == pend) return line_number;
for( ; str<pend; ++str)
{
if(*str == line_break)
{
++line_number;
}
}
if(str[-1] != '\r' && str[-1] != '\n')
{
return line_number +1;
}
else return line_number;
}
else return 0;
}
uint32_t strltrim(char* s, uint32_t n)
{
int32_t len;
char *p, *pend;
if(n==0 || !isspace(CHAR_TO_INT(*s))) return n;
for(p = s+1, pend = s+n, len = 1; p<pend; ++p, ++len)
{
if(!isspace(CHAR_TO_INT(*p))) break;
}
len = n - len;
memmove(s, p, len);
s[len] = 0;
return len;
}
uint32_t strrtrim(char* s, uint32_t n)
{
char* p;
if(n==0) return 0;
p = s+n-1;
while(isspace(CHAR_TO_INT(*p))) --p;
*++p = 0;
return (uint32_t)(p-s);
}
uint32_t strtrim(char* s, uint32_t n)
{
n = strrtrim(s, n);
return strltrim(s, n);
}
uint32_t string2hex(const char* in, uint32_t in_len, char* out)
{
const uint8_t* inp = (const uint8_t*)in;
const uint8_t* inp_end = inp + in_len;
for(; inp < inp_end ; inp++)
{
*out++ = int2hex(*inp >> 4);
*out++ = int2hex(*inp & 0x0F);
}
*out = 0;
return in_len*2;
}
uint32_t hex2string(const char* in, uint32_t len, char* out)
{
char* p = out;
const char* end = in+(len&~1u);
while(in<end){
char hi = get_hexvalue(*in++);
char lo = get_hexvalue(*in++);
*p++ = (hi<<4) | lo;
}
//*p = 0;
return (uint32_t)(p - out);
}
uint32_t decode_url(const char* in, uint32_t in_len, char* out)
{
const char* p = in;
const char* pend = in + in_len;
int32_t len = 0;
for( ; p<pend; ++p,++len)
{
if(*p == '%')
{
if(isxdigit(CHAR_TO_INT(p[1])) &&
isxdigit(CHAR_TO_INT(p[2])))
{
char hi = get_hexvalue(*++p);
char lo = get_hexvalue(*++p);
out[len] = (hi<<4) | lo;
}
else out[len] = *p;
}
else
{
out[len] = *p!='+' ? *p : ' ';
}
}
out[len] = 0;
return len;
}
uint32_t escape_uri(const char* in, uint32_t in_len, char* out)
{
const char* p = in;
const char* pend = in + in_len;
char* po = out;
for (; p < pend; ++p)
{
if (!(*p & 0x80)
&& (isalnum(CHAR_TO_INT(*p))
|| *p == '*'
|| *p == '@'
|| *p == '-'
|| *p == '_'
|| *p == '+'
|| *p == '.'
|| *p == '/'))
{
*po++ = *p;
}
else
{
*po++ = '%';
*po++ = int2hex((int32_t)((uint8_t)(*p) >> 4));
*po++ = int2hex((int32_t)(*p & 0xf));
}
}
*po = 0;
return (uint32_t)(po - out);
}
uint32_t encode_uri(const char* in, uint32_t in_len, char* out)
{
const char* p = in;
const char* pend = in + in_len;
char* po = out;
for (; p < pend; ++p)
{
if (!(*p & 0x80)
&& (isalnum(CHAR_TO_INT(*p))
|| *p == '-'
|| *p == '_'
|| *p == '.'
|| *p == '!'
|| *p == '~'
|| *p == '*'
|| *p == '\''
|| *p == '('
|| *p == ')'
|| *p == ';'
|| *p == '/'
|| *p == '?'
|| *p == ':'
|| *p == '@'
|| *p == '&'
|| *p == '='
|| *p == '+'
|| *p == '$'
|| *p == ','
|| *p == '#'))
{
*po++ = *p;
}
else
{
*po++ = '%';
*po++ = int2hex((int32_t)((uint8_t)(*p) >> 4));
*po++ = int2hex((int32_t)(*p & 0xf));
}
}
*po = 0;
return (uint32_t)(po - out);
}
uint32_t encode_uri_component(const char* in, uint32_t in_len, char* out)
{
const char* p = in;
const char* pend = in + in_len;
char* po = out;
for (; p < pend; ++p)
{
if (!(*p & 0x80)
&& (isalnum(CHAR_TO_INT(*p))
|| *p == '-'
|| *p == '_'
|| *p == '.'
|| *p == '!'
|| *p == '~'
|| *p == '*'
|| *p == '\''
|| *p == '('
|| *p == ')'))
{
*po++ = *p;
}
else
{
*po++ = '%';
*po++ = int2hex((int32_t)((uint8_t)(*p) >> 4));
*po++ = int2hex((int32_t)(*p & 0xf));
}
}
*po = 0;
return (uint32_t)(po - out);
}
uint32_t time33_hash(const char* s)
{
uint32_t hash = 0;
for(; *s; ++s)
hash = (hash<<5) + hash + *s;
return hash;
}
uint32_t time31_hash(const char* s)
{
uint32_t hash = 0;
for(; *s; ++s)
hash = (hash<<5) - hash + *s;
return hash;
}
uint32_t time33_hash_bin(const void* data, uint32_t length)
{
uint32_t hash = 0;
const uint8_t* p = (const uint8_t*)data;
const uint8_t* pend = p + length;
for(; p < pend; ++p)
hash = (hash<<5) + hash + *p;
return hash;
}
uint32_t time31_hash_bin(const void* data, uint32_t length)
{
uint32_t hash = 0;
const uint8_t* p = (const uint8_t*)data;
const uint8_t* pend = p + length;
for(; p < pend; ++p)
hash = (hash<<5) - hash + *p;
return hash;
}
uint32_t one_at_a_time_hash_bin(const void* data, uint32_t length)
{
uint32_t hash = 0;
const uint8_t* p = (const uint8_t*)data;
const uint8_t* pend = p + length;
for(; p < pend; ++p)
{
hash += *p;
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
}
uint32_t bob_hash_bin(const void* data, uint32_t length)
{
#define mix(a,b,c) \
a -= b; a -= c; a ^= (c >> 13);\
b -= c; b -= a; b ^= (a << 8);\
c -= a; c -= b; c ^= (b >> 13);\
a -= b; a -= c; a ^= (c >> 12);\
b -= c; b -= a; b ^= (a << 16);\
c -= a; c -= b; c ^= (b >> 5);\
a -= b; a -= c; a ^= (c >> 3);\
b -= c; b -= a; b ^= (a << 10);\
c -= a; c -= b; c ^= (b >> 15);
const uint8_t* k = (const uint8_t *)data;
uint32_t a, b, c, len;
/* Set up the internal state */
len = length;
a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */
/* Handle most of the key */
while (len >= 12)
{
a += (k[0] +((uint32_t)k[1] << 8) +((uint32_t)k[2] << 16) +((uint32_t)k[3] << 24));
b += (k[4] +((uint32_t)k[5] << 8) +((uint32_t)k[6] << 16) +((uint32_t)k[7] << 24));
c += (k[8] +((uint32_t)k[9] << 8) +((uint32_t)k[10]<< 16)+((uint32_t)k[11] << 24));
mix(a,b,c);
k += 12; len -= 12;
}
/* Handle the last 11 bytes */
c += length;
switch(len)/* all the case statements fall through */
{
case 11: c+=((uint32_t)k[10] << 24);
case 10: c+=((uint32_t)k[9] << 16);
case 9 : c+=((uint32_t)k[8] << 8);
/* the first byte of c is reserved for the length */
case 8 : b+=((uint32_t)k[7] << 24);
case 7 : b+=((uint32_t)k[6] << 16);
case 6 : b+=((uint32_t)k[5] << 8);
case 5 : b+=k[4];
case 4 : a+=((uint32_t)k[3] << 24);
case 3 : a+=((uint32_t)k[2] << 16);
case 2 : a+=((uint32_t)k[1] << 8);
case 1 : a+=k[0];
}
mix(a,b,c);
#undef mix
return c;
}
/*
* a simple 32 bit checksum that can be upadted from either end
* (inspired by Mark Adler's Adler-32 checksum)
*/
#define CHAR_OFFSET 0
uint32_t adler32_checksum(char *buf, int len)
{
int i;
uint32_t s1, s2;
s1 = s2 = 0;
for (i = 0; i < (len - 4); i += 4) {
s2 += 4 * (s1 + buf[i]) + 3 * buf[i + 1] + 2 * buf[i + 2] + buf[i + 3] +
10 * CHAR_OFFSET;
s1 += (buf[i + 0] + buf[i + 1] + buf[i + 2] + buf[i + 3] + 4 * CHAR_OFFSET);
}
for (; i < len; i++) {
s1 += (buf[i] + CHAR_OFFSET);
s2 += s1;
}
return (s1 & 0xffff) + (s2 << 16);
}
#undef CHAR_OFFSET
/*
* adler32_checksum(X0, ..., Xn), X0, Xn+1 ----> adler32_checksum(X1, ..., Xn+1)
* where csum is adler32_checksum(X0, ..., Xn), c1 is X0, c2 is Xn+1
*/
uint32_t adler32_rolling_checksum(uint32_t csum, int len, char c1, char c2)
{
uint32_t s1, s2;
s1 = csum & 0xffff;
s2 = csum >> 16;
s1 -= (c1 - c2);
s2 -= (len * c1 - s1);
return (s1 & 0xffff) + (s2 << 16);
}
uint64_t time31_bob_mixed_hash_bin(const void* data, uint32_t length)
{
uint64_t time31_hash_val = time31_hash_bin(data, length);
uint64_t bob_hash_value = bob_hash_bin(data, length);
return (bob_hash_value<<32) | time31_hash_val;
}
int32_t is_file_name_valid(const char* name)
{
while (*name)
{
switch (*name)
{
case '\\': return 0; break;
case '/': return 0; break;
case ':': return 0; break;
case '*': return 0; break;
case '?': return 0; break;
case '"': return 0; break;
case '<': return 0; break;
case '>': return 0; break;
case '|': return 0; break;
default: break;
}
++name;
}
return 1;
}
int32_t is_file_path_valid(const char* path)
{
while (*path)
{
switch (*path)
{
//case ':': return 0; break;
case '*': return 0; break;
case '?': return 0; break;
case '"': return 0; break;
case '<': return 0; break;
case '>': return 0; break;
case '|': return 0; break;
default: break;
}
++path;
}
return 1;
}
#ifdef __cplusplus
}
#endif
/**
# -*- coding:UTF-8 -*-
*/
#ifndef _STRING_UTILITY_H_
#define _STRING_UTILITY_H_
#ifdef __GNUC__
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#endif
#include <stdlib.h>
#include <stdint.h>
#include <ctype.h>
#include <string.h>
#ifdef __GNUC__
#include <strings.h>
#ifndef stricmp
#define stricmp(s1,s2) strcasecmp(s1,s2)
#endif
#ifndef strnicmp
#define strnicmp(s1,s2,n) strncasecmp(s1,s2,n)
#endif
#ifndef stristr
#define stristr(s1,s2) strcasestr(s1,s2)
#endif
//#define strtou64(nptr,endptr,base) strtoull(nptr,endptr,base) //暂不使用标准库
//#define strtoi64(nptr,endptr,base) strtoll(nptr,endptr,base)
#elif defined(_WIN32)
#define stricmp(a,b) _stricmp(a,b)
#define strnicmp(a,b,n) _strnicmp(a,b,n)
#define strtolower(s) _strlwr(s)
#define strtoupper(s) _strupr(s)
#else
#error os not support
#endif
#ifdef __cplusplus
extern "C"
{
#endif
#ifdef __GNUC__
char* strtolower(char* s);
char* strtoupper(char* s);
#elif defined(_WIN32)
//uint64_t strtou64(const char *nptr, const char **endptr, int base);
const char* stristr(const char* s1, const char* s2);
#endif
/**
*将10进制字符串转换为数字
*/
int64_t atoi64(const char *nptr);
uint64_t atou64(const char *nptr);
int32_t atoi32(const char *nptr);
uint32_t atou32(const char *nptr);
/**
*将base进制字符串转换为数字,*endptr返回第一个不识别字符的地址(如果非空)
*/
int64_t strtoi64(const char *nptr, const char **endptr, int32_t base);
uint64_t strtou64(const char *nptr, const char **endptr, int32_t base);
int32_t strtoi32(const char *nptr, const char **endptr, int32_t base);
uint32_t strtou32(const char *nptr, const char **endptr, int32_t base);
#define CHAR_TO_INT(c) ((int32_t)(uint32_t)(uint8_t)(c))
#define skip_space(str) while(isspace(CHAR_TO_INT(*str)))++str
#define skip_graph(str) while(isgraph(CHAR_TO_INT(*str)))++str
int32_t is_string_integer(const char* str);
int32_t is_string_unsigned_integer(const char* str);
uint32_t strchrcount(const char* str, uint32_t str_len, char chr);
/**
*将数字转换为10进制字符串,result缓冲区应包括结尾的0
@return result_len(不包括结尾的0)
*/
uint32_t i64toa(int64_t n, char* result);
uint32_t u64toa(uint64_t n, char* result);
uint32_t i32toa(int32_t n, char* result);
uint32_t u32toa(uint32_t n, char* result);
/**
*将数字转换为base进制字符串,result缓冲区应包括结尾的0
@return result_len(不包括结尾的0)
*/
uint32_t i64tostr(int64_t n, char* result, int32_t base);
uint32_t u64tostr(uint64_t n, char* result, int32_t base);
uint32_t i32tostr(int32_t n, char* result, int32_t base);
uint32_t u32tostr(uint32_t n, char* result, int32_t base);
/**
*计算字符串的行数
*支持\r,\n,\r\n,\n\r的任意一种,混合出现可能会导致计算错误
@return 字符串的行数,即使最后一行不含有末尾的换行符,也算作一行
*/
uint32_t string_line_number(const char* str, uint32_t str_len);
/**
*去除字符串前面的空白符
@return 去除空白符后的字符串长度
*/
uint32_t strltrim(char* s, uint32_t n);
/**
*去除字符串后面的空白符
@return 去除空白符后的字符串长度
*/
uint32_t strrtrim(char* s, uint32_t n);
/**
*去除字符串前后的空白符
@return 去除空白符后的字符串长度
*/
uint32_t strtrim(char* s, uint32_t n);
/**
*将字符串转换为16进制字符串,out缓冲区需要in_len*2+1
@return out_len(不包括结尾的0)
*/
uint32_t string2hex(const char* in, uint32_t in_len, char* out);
/**
*获取字符串表示的数字字符的数值
*如果该字符不是正确的36进制字符,则返回-1
*/
extern const char* const tbl_hexvalue;
#define get_hexvalue(v) tbl_hexvalue[CHAR_TO_INT(v)]
/**
*获取36进制数值的字符
*如果v不是正确的36进制数值,结果是未定义的
*/
extern const char* const tbl_int2hex_lower;
#define int2hex_lower(v) tbl_int2hex_lower[v]
extern const char* const tbl_int2hex_upper;
#define int2hex_upper(v) tbl_int2hex_upper[v]
#define int2hex(v) int2hex_upper(v)
/**
*将16进制字符串还原,out缓冲区需要in_len/2
@return out_len
*/
uint32_t hex2string(const char* in, uint32_t len, char* out);
/**
*将编码过的URL还原(%xx -> char, + -> ' '),out缓冲区最多需要in_len+1
@return out_len
*/
uint32_t decode_url(const char* in, uint32_t in_len, char* out);
#define decode_url_inplace(url, url_len) decode_url(url, url_len, url)
/**
*对字符串编码
*保留ASCII字母、数字以及*@-_+./,其余转换为%xx
*out缓冲区最多需要in_len*3 + 1
@return out_len
*/
uint32_t escape_uri(const char* in, uint32_t in_len, char* out);
/**
*对URI编码
*保留ASCII字母、数字以及-_.!~*'();/?:@&=+$,#,其余转换为%xx
*out缓冲区最多需要in_len*3 + 1
@return out_len
*/
uint32_t encode_uri(const char* in, uint32_t in_len, char* out);
/**
*对URI组件编码
*保留ASCII字母、数字以及-_.!~*'(),其余转换为%xx
*out缓冲区最多需要in_len*3 + 1
@return out_len
*/
uint32_t encode_uri_component(const char* in, uint32_t in_len, char* out);
/**
*计算字符串 hash
*/
uint32_t time33_hash(const char* s);
uint32_t time31_hash(const char* s);
uint32_t time33_hash_bin(const void* data, uint32_t length);
uint32_t time31_hash_bin(const void* data, uint32_t length);
uint32_t one_at_a_time_hash_bin(const void* data, uint32_t length);
uint32_t bob_hash_bin(const void* data, uint32_t length);
uint32_t adler32_checksum(char* buf, int len);
uint32_t adler32_rolling_checksum(uint32_t csum, int len, char c1, char c2);
/**
*分别使用time31和bob计算hash值,并将之合成一个64位的结果
*/
uint64_t time31_bob_mixed_hash_bin(const void* data, uint32_t length);
/**
*判断给定的文件名是否含有非法字符
*非法字符包括 \/:*?"<>|
@return true/false
*/
int32_t is_file_name_valid(const char* name);
/**
*判断给定的文件路径是否含有非法字符
*非法字符包括 :*?"<>|
@return true/false
*/
int32_t is_file_path_valid(const char* path);
#ifdef __cplusplus
}
#endif
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment