Skip to content

Instantly share code, notes, and snippets.

@ptomulik
Created August 8, 2015 21:31
Show Gist options
  • Save ptomulik/b9e78807b2b243d229c6 to your computer and use it in GitHub Desktop.
Save ptomulik/b9e78807b2b243d229c6 to your computer and use it in GitHub Desktop.
wstream_convert
// clxx/common/wstring_convert.hpp
/** // doc: clxx/common/wstring_convert.hpp {{{
* \file clxx/common/wstring_convert.hpp
* \brief Provides clxx::wstring_convert
*/ // }}}
#ifndef CLXX_COMMON_WSTRING_CONVERT_HPP_INCLUDED
#define CLXX_COMMON_WSTRING_CONVERT_HPP_INCLUDED
#include <string>
#include <locale>
namespace clxx {
/** // doc: wstring_convert {{{
* \brief String converter
*
* Class template clxx::wstring_convert performs conversion between byte string
* \c std::string and wide string \c std::basic_string<Elem>, using an
* individual code conversion facet \p Codecvt. clxx::wstring_convert assumes
* ownership of the conversion facet, and cannot use a facet managed by a
* locale.
*
* This is local implementation of the standard class std::wstring_convert,
* which is not available in libstdc++ shipped with current gcc 4.9.2. It shall
* be replaced with an alias to std::wstring_convert at some point.
*/ // }}}
template< class Codecvt,
class Elem = wchar_t,
class Wide_alloc = std::allocator<Elem>,
class Byte_alloc = std::allocator<char> >
class wstring_convert
{
public:
typedef std::basic_string<char, std::char_traits<char>, Byte_alloc> byte_string;
typedef std::basic_string<Elem, std::char_traits<Elem>, Wide_alloc> wide_string;
typedef typename Codecvt::state_type state_type;
typedef typename wide_string::traits_type::int_type int_type;
public:
/** // doc: wstring_convert(Codecvt*) {{{
* \brief Constructs the wstring_convert object with a specified conversion
* facet, using default-constructed values for the shift state and the
* error string
*/ // }}}
explicit wstring_convert(Codecvt* pcvt = new Codecvt)
:_M_cvt(pcvt),
_M_state(),
_M_byte_error(nullptr),
_M_wide_error(nullptr),
_M_converted(0ul)
{ }
/** // doc: wstring_convert(Codecvt*, state_type) {{{
* \brief Constructs the wstring_convert object with a specified conversion
* facet and specified shift state, using default-constructed values
* for the error strings
*/ // }}}
wstring_convert(Codecvt* pcvt, state_type state)
:_M_cvt(pcvt),
_M_state(state),
_M_byte_error(nullptr),
_M_wide_error(nullptr),
_M_converted(0ul)
{ }
/** // doc: wstring_convert(const byte_string&, const wide_string&) {{{
* \brief Constructs the wstring_convert object with specified error strings,
* using <tt>new Codecvt</tt> as the conversion facet and the
* default-constructed \c state_type as shift state
*/ // }}}
explicit wstring_convert(const byte_string& byte_err,
const wide_string& wide_err = wide_string())
:_M_cvt(new Codecvt),
_M_state(),
_M_byte_error(new byte_string(byte_err)),
_M_wide_error(new wide_string(wide_err)),
_M_converted(0ul)
{ }
/** // doc: wstring_convert(const wstring_convert&) {{{
* \brief The copy constructor is deleted, wstring_convert is not CopyConstructible
*/ // }}}
wstring_convert(const wstring_convert&) = delete;
/** // doc: ~wstring_convert() {{{
* \brief Destructor
*/ // }}}
~wstring_convert()
{
if(_M_cvt)
delete _M_cvt;
if(_M_byte_error)
delete _M_byte_error;
if(_M_wide_error)
delete _M_wide_error;
}
/** // doc: state() {{{
* \brief Returns the current value of the conversion state, which is stored
* in this wstring_convert object. The conversion state may be
* explicitly set in the constructor and is updated by all conversion
* operations
*/ // }}}
state_type state() const
{ return _M_state; }
/** // doc: converted() {{{
* \brief Returns the number of source characters that were processed by the
* most recent #from_bytes() or #to_bytes()
*/ // }}}
std::size_t converted() const noexcept
{
return _M_converted;
}
/** // doc: from_bytes(char) {{{
* \brief Converts a byte as if it was a string of length 1 to wide_string
*/ // }}}
wide_string from_bytes(char byte)
{
const char buf[2] = { byte, '\0' };
return from_bytes(buf, buf + 1);
}
/** // doc from_bytes(const char*) {{{
* \brief Converts the null-terminated multibyte character sequence beginning
* at the character pointed to by \p ptr to wide_string
*
* The conversion begins in initial shift state, unless non-initial starting
* state was provided to this wstring_convert constructor. The number of
* characters converted and the final value of the conversion state are
* remembered and can be accessed with #state() and #converted().
*
* \returns
* A wide_string object containing the result of multibyte to wide
* conversion. If the conversion failed and there was a user-supplied
* wide-error string provided to the constructor of this wstring_convert,
* returns that wide-error string.
*
* \throw std::range_error
* On conversion failure, only if this wstring_convert object was
* constructed without a user-supplied wide-error string.
*/ // }}}
wide_string from_bytes(const char* ptr)
{
return from_bytes(ptr, ptr + std::char_traits<char>::length(ptr));
}
/** // doc: from_bytes(const byte_string&) {{{
* \brief Converts the narrow string \p str to wide_string
*
* The conversion begins in initial shift state, unless non-initial starting
* state was provided to this wstring_convert constructor. The number of
* characters converted and the final value of the conversion state are
* remembered and can be accessed with #state() and #converted().
*
* \returns
* A wide_string object containing the result of multibyte to wide
* conversion. If the conversion failed and there was a user-supplied
* wide-error string provided to the constructor of this wstring_convert,
* returns that wide-error string.
*
* \throw std::range_error
* On conversion failure, only if this wstring_convert object was
* constructed without a user-supplied wide-error string.
*/ // }}}
wide_string from_bytes(const byte_string& str)
{
return from_bytes(str.data(), str.data() + str.size());
}
/** // doc: from_bytes(const char*, const char*) {{{
* \brief Converts the narrows multibyte character sequence
* <tt>[first, last)</tt> to wide_string
*
* The conversion begins in initial shift state, unless non-initial starting
* state was provided to this wstring_convert constructor. The number of
* characters converted and the final value of the conversion state are
* remembered and can be accessed with #state() and #converted().
*
* \returns
* A wide_string object containing the result of multibyte to wide
* conversion. If the conversion failed and there was a user-supplied
* wide-error string provided to the constructor of this wstring_convert,
* returns that wide-error string.
*
* \throw std::range_error
* On conversion failure, only if this wstring_convert object was
* constructed without a user-supplied wide-error string.
*/ // }}}
wide_string from_bytes(const char* first, const char* last)
{
wide_string wstr(4 + (last - first), static_cast<Elem>('\0'));
const char* next;
Elem* wnext;
std::codecvt_base::result r;
r = _M_cvt->in(_M_state, first, last, next, &wstr[0], &wstr[wstr.size()], wnext);
if(r == std::codecvt_base::ok || r == std::codecvt_base::noconv)
{
wstr.resize(wnext - &wstr[0]);
_M_converted = next - first;
}
else
{
if(_M_wide_error)
return *_M_wide_error;
else
throw std::range_error("wstring_convert");
}
return wstr;
}
/** // doc: to_bytes(Elem) {{{
* \brief Converts \p wchar as if it was a string of length 1, to byte_string
*
* The conversion begins in initial shift state, unless non-initial starting
* state was provided to this wstring_convert constructor. The number of
* characters converted and the final value of the conversion state are
* remembered and can be accessed with #state() and #converted()
*
* \returns
* A byte_string object containing the results of the wide to multibyte
* conversion. If the conversion failed and there was a user-supplied
* byte-error string provided to the constructor of this wstring_convert,
* returns that byte-error string.
*
* \throw std::range_error
* If this wstring_convert object was constructed without a user-supplied
* byte-error string, throws \c std::range_error on conversion failure
*/ // }}}
byte_string to_bytes(Elem wchar)
{
Elem wbuf[2] = { wchar, static_cast<Elem>('0') };
return to_bytes(wbuf, wbuf+1);
}
/** // doc: to_bytes(const Elem*) {{{
* \brief Converts the null-terminated wide character sequence beginning at
* the wide character pointed to by \p wptr, to byte_string
*
* \returns
* A byte_string object containing the results of the wide to multibyte
* conversion. If the conversion failed and there was a user-supplied
* byte-error string provided to the constructor of this wstring_convert,
* returns that byte-error string.
*
* \throw std::range_error
* If this wstring_convert object was constructed without a user-supplied
* byte-error string, throws \c std::range_error on conversion failure
*/ // }}}
byte_string to_bytes(const Elem* wptr)
{
return to_bytes(wptr, wptr + std::char_traits<Elem>::length(wptr));
}
/** // doc: to_bytes(Elem) {{{
* \brief Converts the wide string \p str to byte_string
*
* \returns
* A byte_string object containing the results of the wide to multibyte
* conversion. If the conversion failed and there was a user-supplied
* byte-error string provided to the constructor of this wstring_convert,
* returns that byte-error string.
*
* \throw std::range_error
* If this wstring_convert object was constructed without a user-supplied
* byte-error string, throws \c std::range_error on conversion failure
*/ // }}}
byte_string to_bytes(const wide_string& wstr)
{
return to_bytes(wstr.data(), wstr.data() + wstr.size());
}
/** // doc: to_bytes(Elem) {{{
* \brief Converts the wide character sequence <tt>[first, last)</tt> to
* byte_string
*
* \returns
* A byte_string object containing the results of the wide to multibyte
* conversion. If the conversion failed and there was a user-supplied
* byte-error string provided to the constructor of this wstring_convert,
* returns that byte-error string.
*
* \throw std::range_error
* If this wstring_convert object was constructed without a user-supplied
* byte-error string, throws \c std::range_error on conversion failure
*/ // }}}
byte_string to_bytes(const Elem* first, const Elem* last)
{
byte_string str(4 + (last - first)*_M_cvt->max_length(), '\0');
char* next;
const Elem* wnext;
std::codecvt_base::result r;
r = _M_cvt->out(_M_state, first, last, wnext, &str[0], &str[str.size()], next);
if(r == std::codecvt_base::ok || r == std::codecvt_base::noconv)
{
str.resize(next - &str[0]);
_M_converted = wnext - first;
}
else
{
if(_M_byte_error)
return *_M_byte_error;
else
throw std::range_error("wstring_convert");
}
return str;
}
private:
Codecvt* _M_cvt;
state_type _M_state;
byte_string* _M_byte_error;
wide_string* _M_wide_error;
std::size_t _M_converted;
};
} // end namespace clxx
#endif /* CLXX_COMMON_WSTRING_CONVERT_HPP_INCLUDED */
// vim: set expandtab tabstop=2 shiftwidth=2:
// vim: set foldmethod=marker foldcolumn=4:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment