Last active
September 22, 2018 04:21
-
-
Save DBJDBJ/bcc95d97626dddc2da6cfbf71b8056cd to your computer and use it in GitHub Desktop.
fast string trim with C++17 usage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Copyright 2018 by [email protected] | |
Licensed under the Apache License, Version 2.0 (the "License"); | |
you may not use this file except in compliance with the License. | |
You may obtain a copy of the License at | |
http ://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
*/ | |
#include <string> | |
#include <string_view> | |
#include <stdlib.h> | |
#include <assert.h> | |
#if ! _HAS_CXX17 | |
#error C++17 required | |
#endif | |
namespace { | |
constexpr int EOS = int('\0'); | |
// this actually defines the trimming policy | |
extern "C" inline bool dbj_isspace(unsigned char c) | |
{ | |
return c == ' '; // just spaces are trimmed ! | |
} | |
// note: we use unsigned char to avoid accidents with negative char values | |
extern "C" inline void string_trim_front_back_finder( unsigned char ** begin_, unsigned char ** end_) | |
{ | |
assert(*end_); | |
assert(*begin_); | |
if (*begin_ == *end_) return; | |
while (dbj_isspace(**begin_)) { | |
(*begin_)++; | |
}; | |
if (*begin_ == *end_) return; | |
// otherwise the algorithm will not work | |
assert(**end_ == EOS); | |
// we have requested end_ to be EOS (aka 0) | |
// it is not a space (aka 32) | |
// thus we have to move it "left" 1 first | |
// before the first check | |
(*end_)--; | |
while (dbj_isspace(**end_)) { | |
(*end_)--; | |
}; | |
// move it back | |
// one beyond the last non space char | |
(*end_)++; | |
} | |
// must be zero limited string | |
// that is with EOS ('\0') a the end | |
extern "C" inline void string_trim(const char * text, unsigned char ** p1, unsigned char ** p2) | |
{ | |
assert(text); | |
size_t text_len = strlen(text); | |
*p1 = (unsigned char *)& text[0]; | |
// point at EOS position | |
// otherwise the algorithm will not work | |
*p2 = (unsigned char *)& text[text_len]; | |
string_trim_front_back_finder(p1, p2); | |
} | |
// must be limited string | |
// that is with EOS ('\0') a the end | |
inline std::string trimmer(::std::string_view text) | |
{ | |
unsigned char * p1 = 0; | |
unsigned char * p2 = 0; | |
string_trim(text.data(), &p1, &p2); | |
return { p1, p2 }; | |
} | |
} // nspace | |
int main() | |
{ | |
using namespace ::std::string_view_literals; | |
auto target = "LINE O FF TE XT"sv ; | |
std::string_view text[]{ | |
{ " LINE O FF TE XT "sv }, | |
{ " LINE O FF TE XT"sv }, | |
{ "LINE O FF TE XT"sv }, | |
{ " "sv } | |
}; | |
_ASSERTE( target == trimmer(text[0]) ); | |
_ASSERTE( target == trimmer(text[1]) ); | |
_ASSERTE( target == trimmer(text[2]) ); | |
// on trim, spaces are collapsing | |
// to empty string | |
_ASSERTE( "" == trimmer(text[3]) ); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
NOTE1: This is just space trimming, of zero limited strings.
NOTE2: extern "C" does NOT mean that is a C code ...