Last active
February 9, 2021 14:04
-
-
Save kingsimba/0eeb7c5c900652186ec056dfd6c06a86 to your computer and use it in GitHub Desktop.
Try to make std::string more efficient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "string_slice.h" | |
#include "gf_string.h" | |
#include "jansson/jansson.h" | |
#include "pch_gis_runtime.h" | |
#include "tr_stdlib.h" | |
bool StringCharIter::Next(wchar32* cOut, int* consumedBytesOut) { | |
int32_t c; | |
const char* newStr = utf8_iterate(str_, length_, &c); | |
if (newStr == str_) { | |
return false; | |
} | |
int consumedBytes = (int)(newStr - str_); | |
length_ -= consumedBytes; | |
str_ = newStr; | |
*cOut = c; | |
if (consumedBytesOut != NULL) *consumedBytesOut = consumedBytes; | |
return true; | |
} | |
////////////////////////////////////////////////////////////////////////// | |
static const char* _strchr(const char* s, const char* sEnd, char c) { | |
for (; s != sEnd; s++) { | |
if (*s == c) return s; | |
} | |
return s; | |
} | |
static int _strncmp(const char* s1, const char* s1End, const char* s2, size_t s2len) { | |
unsigned char u1, u2; | |
while (s2len-- > 0 && s1 != s1End) { | |
u1 = (unsigned char)*s1++; | |
u2 = (unsigned char)*s2++; | |
if (u1 != u2) return u1 - u2; | |
if (u1 == '\0') return 0; | |
} | |
return 0; | |
} | |
static const char* _strstr(const char* s1, const char* s1End, const char* s2, size_t s2len) { | |
const char* p = s1; | |
for (; (p = _strchr(p, s1End, *s2)) != NULL; p++) { | |
if (_strncmp(p, s1End, s2, s2len) == 0) return p; | |
} | |
return p; | |
} | |
bool StringSubsliceIter::Next(StringSlice* cOut, Range* rangeOut) { | |
if (str_ == str_end_) return false; | |
const char* newStr = _strstr(str_, str_end_, sep_, sep_length_); | |
*cOut = StringSlice(str_, (int)(newStr - str_)); | |
if (rangeOut != NULL) *rangeOut = Range_make((int)(str_ - str_start_), cOut->Length()); | |
if (newStr != str_end_) { | |
str_ = newStr + sep_length_; | |
} else { | |
str_ = newStr; | |
} | |
return true; | |
} | |
std::vector<StringSlice> StringSlice::Split(StringSlice sep) { | |
std::vector<StringSlice> sv; | |
auto iter = this->iterBySpliting(sep); | |
StringSlice slice; | |
while (iter.Next(&slice)) { | |
sv.push_back(slice); | |
} | |
return sv; | |
} | |
////////////////////////////////////////////////////////////////////////// | |
Range StringSlice::FindFrom(int start, wchar32 code) { | |
auto iter = this->Subslice(start, length_ - start).Iter(); | |
wchar32 c; | |
int consumedBytes; | |
int totalBytes = 0; | |
while (iter.Next(&c, &consumedBytes)) { | |
if (c == code) { | |
return Range_make(start + totalBytes, consumedBytes); | |
} | |
totalBytes += consumedBytes; | |
} | |
return invalidRange; | |
} | |
int StringSlice::RFind(char c) { | |
for (int i = length_ - 1; i >= 0; i--) { | |
if (str_[i] == c) { | |
return i; | |
} | |
} | |
return -1; | |
} | |
Range StringSlice::FindSliceFrom(int start, StringSlice needle) { | |
const char* strEnd = str_ + length_; | |
const char* newStr = _strstr(str_ + start, strEnd, needle.str_, needle.length_); | |
if (newStr == strEnd) { | |
return invalidRange; | |
} | |
return Range_make((int)(newStr - str_), needle.length_); | |
} | |
sp<GfString> StringSlice::ToString() { return GfString::AllocWithBytes(str_, length_); } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include "gf_object.h" | |
#include "tr_stdlib.h" | |
class GfString; | |
class StringSlice; | |
/** | |
* Iterate characters(as Unicode Code Points) in a string | |
* | |
* ``` | |
* auto iter = StringSlice(u8"hello world").iter(); | |
* wchar32 c; | |
* while(iter.next(&c)) { | |
* use(c); | |
* } | |
* ``` | |
*/ | |
class StringCharIter { | |
public: | |
StringCharIter(StringSlice& slice); | |
bool Next(wchar32* cOut) { return Next(cOut, NULL); } | |
bool Next(wchar32* cOut, int* consumedBytesOut); | |
private: | |
const char* str_; | |
int length_; | |
}; | |
/** | |
* Split a string with a separate string | |
* | |
* For example. "hello---world".split("---") shall become "hello" and "world" | |
* | |
* ``` | |
* auto iter = StringSlice("hello---world").iterBySpliting("---"); | |
* StringSlice slice; | |
* while(iter.next(&slice)) { | |
* use(slice); | |
* } | |
* ``` | |
*/ | |
class StringSubsliceIter { | |
public: | |
StringSubsliceIter(StringSlice& slice, StringSlice& sep); | |
bool Next(StringSlice* slice_out) { return Next(slice_out, NULL); } | |
bool Next(StringSlice* slice_out, Range* rangeOut); | |
private: | |
const char* str_start_; | |
const char* str_end_; | |
const char* str_; | |
const char* sep_; | |
int sep_length_; | |
}; | |
/** | |
* StringSlice uses UTF-8 encoding | |
*/ | |
class StringSlice { | |
public: | |
forceinline StringSlice() { | |
str_ = NULL; | |
length_ = 0; | |
} | |
forceinline StringSlice(const char* str) { | |
str_ = (char*)str; | |
length_ = (int)strlen(str); | |
} | |
forceinline StringSlice(const char* str, int len) { | |
str_ = (char*)str; | |
length_ = len; | |
} | |
forceinline void Init(const char* str, int len) { | |
str_ = (char*)str; | |
length_ = len; | |
} | |
////////////////////////////////////////////////////////////////////////// | |
// Accessors | |
forceinline const char* Buffer() { return str_; } | |
forceinline int Length() { return length_; } | |
forceinline StringCharIter Iter() { return StringCharIter(*this); } | |
forceinline StringSubsliceIter iterBySpliting(StringSlice sep) { | |
return StringSubsliceIter(*this, sep); | |
} | |
////////////////////////////////////////////////////////////////////////// | |
// Conversions | |
// copy to C string | |
forceinline void ToCString(char* str, size_t max_len) { | |
if (max_len < (size_t)length_ + 1) { | |
str[0] = 0; | |
return; | |
} | |
memcpy(str, str_, length_); | |
str[length_] = 0; | |
} | |
// Create a standalone GfString | |
sp<GfString> ToString(); | |
////////////////////////////////////////////////////////////////////////// | |
// Search | |
// Find subslice | |
forceinline Range FindSlice(StringSlice needle) { return FindSliceFrom(0, needle); } | |
Range FindSliceFrom(int start, StringSlice needle); | |
// Find Unicode character | |
forceinline Range Find(wchar32 code) { return FindFrom(0, code); } | |
Range FindFrom(int start, wchar32 code); | |
// Find ANSI character. | |
// If the string contains multi-byte character in UTF8, it will misbehave. | |
forceinline int Find(char c) { return FindFrom(0, c); } | |
int FindFrom(int start, char c); | |
int RFind(char c); | |
////////////////////////////////////////////////////////////////////////// | |
// Actions | |
forceinline StringSlice Subslice(int start, int length) { | |
return StringSlice(str_ + start, length); | |
} | |
std::vector<StringSlice> Split(StringSlice sep); | |
////////////////////////////////////////////////////////////////////////// | |
// Equals | |
forceinline bool Equals(StringSlice* r) { | |
return length_ == r->length_ && memcmp(str_, r->str_, length_) == 0; | |
} | |
forceinline bool Equals(const char* r) { | |
return length_ == strlen(r) && memcmp(str_, r, length_) == 0; | |
} | |
protected: | |
char* str_; | |
int length_; | |
}; | |
inline StringCharIter::StringCharIter(StringSlice& slice) { | |
str_ = slice.Buffer(); | |
length_ = slice.Length(); | |
} | |
inline StringSubsliceIter::StringSubsliceIter(StringSlice& slice, StringSlice& sep) { | |
str_ = str_start_ = slice.Buffer(); | |
str_end_ = str_ + slice.Length(); | |
sep_ = sep.Buffer(); | |
sep_length_ = sep.Length(); | |
} | |
inline int StringSlice::FindFrom(int start, char c) { | |
for (const char* p = str_ + start; *p; p++) { | |
if (*p == c) return (int)(p - str_); | |
} | |
return -1; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename T> | |
using sptr = std::shared_ptr<T>; | |
template <typename T> | |
using svector = std::vector<sptr<T>>; | |
using namespace std; | |
class MyString; | |
class MyObject { | |
public: | |
virtual sptr<MyString> ToString() = 0; | |
}; | |
class MyString : public MyObject, public StringSlice { | |
public: | |
static sptr<MyString> AllocWithCString(const char* str) { return make_shared<MyString>(str); } | |
static sptr<MyString> AllocWithSlice(StringSlice str) { return make_shared<MyString>(str.Buffer(), str.Length()); } | |
virtual sptr<MyString> ToString() { return sptr<MyString>(this); } | |
MyString(const char* str) : str_(str) { this->Init(&str_[0], (int)str_.size()); } | |
MyString(const char* str, int len) : str_(str, len) { this->Init(&str_[0], (int)str_.size()); } | |
const char* CStr() { return this->str_.c_str(); } | |
private: | |
string str_; | |
}; | |
template <typename T> | |
class MyArray : public MyObject { | |
public: | |
static sptr<MyArray<T>> Alloc() { return make_shared<MyArray<T>>(); } | |
void AddObject(sptr<T> obj) { this->array_.push_back(obj); } | |
sptr<T> ObjectAtIndex(int i) { return this->array_[i]; } | |
virtual sptr<MyString> ToString() { return MyString::AllocWithCString("This is an array"); } | |
private: | |
vector<sptr<T>> array_; | |
}; | |
template<typename T> | |
class MyArrayRef { | |
public: | |
MyArrayRef(sptr<MyArray<T>> arr) : array_(arr) {} | |
sptr<T> operator[](size_t i) { return this->array_->ObjectAtIndex((int)i); } | |
sptr<MyArray<T>> operator->() { return array_; } | |
private: | |
sptr<MyArray<T>> array_; | |
}; | |
TEST_F(ArrayTest, vector) { | |
auto str = MyString::AllocWithCString("hello world"); | |
// cast to base | |
sptr<MyObject> base = str; | |
// cast to derived | |
str = static_pointer_cast<MyString>(base); | |
MyArrayRef<MyString> v = MyArray<MyString>::Alloc(); | |
v->AddObject(str); | |
v->AddObject(str); | |
auto pieces = str->Split(" "); | |
for (auto s : pieces) { | |
v->AddObject(MyString::AllocWithSlice(s)); | |
} | |
EXPECT_STREQ(v[2]->CStr(), "hello"); | |
EXPECT_STREQ(v[3]->CStr(), "world"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment