Created
February 8, 2013 06:23
-
-
Save lifthrasiir/4737055 to your computer and use it in GitHub Desktop.
starlight/text/encoding_system_win32.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This file is a part of libseit library of theseit project. | |
* Copyright (c) 2005-2007, theseit project team. | |
* See README, COPYING and AUTHORS files for more information. | |
*/ | |
#define LIBSEIT_INTERNAL | |
#define WINDOWS_LEAN_AND_MEAN | |
#include <map> | |
#include <windows.h> | |
#include "libseit/ustring.h" | |
#include "libseit/exception.h" | |
#include "libseit/textcodec/encoding.h" | |
#include "libseit/textcodec/encoding_system.h" | |
namespace theseit { | |
namespace libseit { | |
//////////////////////////////////////////////////////////////////////////////// | |
// support function | |
static char encodingname[16] = {'c', 'p'}; // "cp" + at most 10 digits + '\0' | |
const char *TextEncodingSystem::realName() const | |
{ | |
if (!encodingname[2]) { | |
UINT acp = GetACP(); | |
// trying to avoid sprintf | |
if (acp > 9) { | |
if (acp > 99) { | |
if (acp > 999) { | |
if (acp > 9999) { | |
encodingname[6] = '0' + (acp % 10); acp /= 10; | |
} | |
encodingname[5] = '0' + (acp % 10); acp /= 10; | |
} | |
encodingname[4] = '0' + (acp % 10); acp /= 10; | |
} | |
encodingname[3] = '0' + (acp % 10); acp /= 10; | |
} | |
encodingname[2] = '0' + (acp % 10); | |
} | |
return encodingname; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// stateless encoder/decoder | |
std::string TextEncodingSystem::encode | |
(const uchar_t *buf, std::size_t len, TextErrorCallback*) const | |
{ | |
int reslen = WideCharToMultiByte(CP_ACP, 0, reinterpret_cast<const wchar_t*>(buf), | |
len, NULL, 0, NULL, NULL); | |
if (reslen == 0) throw windows_error(); | |
std::string result(std::size_t(reslen), 0); | |
reslen = WideCharToMultiByte(CP_ACP, 0, reinterpret_cast<const wchar_t*>(buf), | |
len, const_cast<char*>(result.data()), reslen, NULL, NULL); | |
if (reslen == 0) throw windows_error(); | |
return result; | |
} | |
ustring TextEncodingSystem::decode | |
(const char *buf, std::size_t len, TextErrorCallback*) const | |
{ | |
ustring result; | |
const uchar_t *map = charmap_->mapping; | |
if (!callback) callback = textDefaultCallback; | |
if (len) { | |
while (true) { | |
while (static_cast<unsigned char>(*buf) < 128) { | |
result.push_back(static_cast<uchar_t>(*buf++)); | |
if (--len == 0) goto done; | |
} | |
while (static_cast<unsigned char>(*buf) > 127) { | |
uchar_t ch = map[static_cast<unsigned char>(*buf++) - 128]; | |
if (ch == 0xfffd) { | |
result.append(callback->invalid(this, buf - 1, buf)); | |
} else { | |
result.push_back(ch); | |
} | |
if (--len == 0) goto done; | |
} | |
} | |
} | |
done: | |
return result; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// stateful encoder | |
class TextEncoderSimple : public TextEncoder | |
{ | |
public: | |
TextEncoderSimple(const TextEncodingSimple *enc, TextErrorCallback *callback = 0): | |
TextEncoder(enc, callback) | |
{} | |
virtual std::string operator()(const uchar_t *buf, std::size_t len); | |
virtual std::string operator()(ustring src) | |
{ | |
return operator()(src.data(), src.length()); | |
} | |
virtual TextEncoder *copy() const { return new TextEncoderShiftJIS(*this); } | |
protected: | |
const TextEncodingSimple *encoding() const | |
{ | |
return reinterpret_cast<const TextEncodingSimple*>(encoding_); | |
} | |
}; | |
std::string TextEncoderSimple::operator()(const uchar_t *buf, std::size_t len) | |
{ | |
std::string result; | |
reversemap_t map = reversemap[encoding()]; | |
while (len--) { | |
if (*buf < 128) { | |
result.push_back(static_cast<char>(*buf++)); | |
} else { | |
reversemap_t::iterator it = map.find(*buf++); | |
if (it == map.end()) { | |
result.append(callback_->invalid(encoding_, buf - 1, buf)); | |
} else { | |
result.push_back((*it).second); | |
} | |
} | |
} | |
return result; | |
} | |
TextEncoder *TextEncodingSimple::makeEncoder(TextErrorCallback *callback) const | |
{ | |
return new TextEncoderSimple(this, callback); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// stateful decoder | |
class TextDecoderSimple : public TextDecoder | |
{ | |
public: | |
TextDecoderSimple(const TextEncodingSimple *enc, TextErrorCallback *callback = 0): | |
TextDecoder(enc, callback) | |
{} | |
virtual ustring operator()(const char *buf, std::size_t len); | |
virtual ustring operator()(std::string src) | |
{ | |
return operator()(src.data(), src.length()); | |
} | |
virtual TextDecoder *copy() const { return new TextDecoderShiftJIS(*this); } | |
protected: | |
const TextEncodingSimple *encoding() const | |
{ | |
return reinterpret_cast<const TextEncodingSimple*>(encoding_); | |
} | |
}; | |
ustring TextDecoderSimple::operator()(const char *buf, std::size_t len) | |
{ | |
ustring result; | |
const uchar_t *map = encoding()->getCharmap()->mapping; | |
while (len--) { | |
if (static_cast<unsigned char>(*buf) < 128) { | |
result.push_back(static_cast<uchar_t>(*buf++)); | |
} else { | |
uchar_t ch = map[static_cast<unsigned char>(*buf++) - 128]; | |
if (ch == 0xfffd) { | |
result.append(callback_->invalid(encoding_, buf - 1, buf)); | |
} else { | |
result.push_back(ch); | |
} | |
} | |
} | |
return result; | |
} | |
TextDecoder *TextEncodingSimple::makeDecoder(TextErrorCallback *callback) const | |
{ | |
return new TextDecoderSimple(this, callback); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
} // namespace libseit | |
} // namespace theseit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment