Skip to content

Instantly share code, notes, and snippets.

@mitsu-ksgr
Last active August 29, 2015 14:04
Show Gist options
  • Save mitsu-ksgr/7c20b4a71e1ab5a465e2 to your computer and use it in GitHub Desktop.
Save mitsu-ksgr/7c20b4a71e1ab5a465e2 to your computer and use it in GitHub Desktop.
【C++】文字列中に指定文字列がいくつ出現するかカウントする関数。UTF-8の文字列の長さを取得する関数。
#include <iostream>
/**
* @brief Return the number of occurrences of substring 'sub in string 'src'.
* @param src search target.
* @param sub substring.
* @return the number of occurrences of substring.
*/
int countSubStr(std::string &src, std::string &sub)
{
size_t len = src.length();
int count = 0;
for(int idx = 0; idx < len; ++count) {
int ret = src.find(sub, idx);
if(ret == std::string::npos)
break;
idx = ret + 1;
}
return count;
}
/**
* @brief Return the length of UTF-8 String.
* @param str UTF-8 String.
* @return the length of UTF-8 String.
*/
int getLengthUTF8String(const char *str)
{
// Note: http://ja.wikipedia.org/wiki/UTF-8
//
// 1byte: 0xxxxxxx
// 2byte: 110xxxxx
// 3byte: 1110xxxx
// 4byte: 11110xxx
constexpr const int kBitChecker[] {
0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
};
int count = 0, idx = 0;
char ch;
while((ch = str[idx]) != '\0') {
// 1 byte characters
if(!(ch & kBitChecker[8])) {
count++;
idx++;
// 2 byte characters
} else if((ch & kBitChecker[8]) && (ch & kBitChecker[7]) && !(ch & kBitChecker[6])) {
count++;
idx += 2;
// 3 byte characters
} else if((ch & kBitChecker[8]) && (ch & kBitChecker[7]) &&
(ch & kBitChecker[6]) && !(ch & kBitChecker[5])) {
count++;
idx += 3;
// 4 byte characters
} else if((ch & kBitChecker[8]) && (ch & kBitChecker[7]) &&
(ch & kBitChecker[6]) && (ch & kBitChecker[5]) && !(ch & kBitChecker[4])) {
count++;
idx += 4;
// Is not first character
} else {
idx++;
continue;
}
}
return count;
}
int getLengthUTF8String(std::string &str) {
return getLengthUTF8String(str.c_str());
}
int main(int argc, const char **argv)
{
std::string src, sub;
if(argc >= 3) {
src = argv[1];
sub = argv[2];
} else {
src = "abcabcabc";
sub = "bc";
}
std::cout << "src = " << src << std::endl;
std::cout << "sub = " << sub << std::endl;
// Test.
int cnt = countSubStr(src, sub);
std::cout << "cnt = " << cnt << std::endl;
// Test2.
std::cout << "------------------------" << std::endl;
std::string utf8 = "ABCDEΑΒΓΔΕあいうえお𥼣𥽜𥿠𥿔𨷻";
int len = getLengthUTF8String(utf8);
std::cout << "utf8 = " << utf8 << std::endl;
std::cout << "length = " << utf8.length() << std::endl;
std::cout << "getLengthUTF8String = " << len << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment