Fix utf16 conversion

Follow the rules in:
https://github.com/brofield/simpleini/blob/master/ConvertUTF.c
This commit is contained in:
Benau 2019-06-11 08:29:48 +08:00
parent 5af761b841
commit 0fdbe5d34f
2 changed files with 43 additions and 19 deletions

View File

@ -209,17 +209,22 @@ namespace utf8
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp)) {
if (start != end) {
if (internal::is_lead_surrogate(cp))
{
uint32_t lead_surrogate = cp;
if (start != end)
{
uint32_t trail_surrogate = internal::mask16(*start++);
if (internal::is_trail_surrogate(trail_surrogate))
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
{
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10) +
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
}
else
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
}
else
throw invalid_utf16(static_cast<uint16_t>(cp));
}
// Lone trail surrogate
else if (internal::is_trail_surrogate(cp))
@ -239,17 +244,20 @@ namespace utf8
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp))
{
uint32_t lead_surrogate = cp;
if (start != end)
{
uint32_t trail_surrogate = internal::mask16(*start++);
if (internal::is_trail_surrogate(trail_surrogate))
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
{
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10 ) +
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
}
else
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
}
else
throw invalid_utf16(static_cast<uint16_t>(cp));
}
// Lone trail surrogate
else if (internal::is_trail_surrogate(cp))
@ -263,10 +271,14 @@ namespace utf8
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start != end) {
while (start != end)
{
uint32_t cp = next(start, end);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
if (cp > 0xffff)
{
cp -= 0x10000;
//make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
@ -292,8 +304,9 @@ namespace utf8
uint32_t cp = *start++;
if (cp > 0xffff)
{
cp -= 0x10000;
//make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else

View File

@ -129,12 +129,16 @@ namespace utf8
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
while (start != end)
{
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp)) {
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp))
{
uint32_t lead_surrogate = cp;
uint32_t trail_surrogate = internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10) +
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
}
result = append(cp, result);
}
@ -150,8 +154,10 @@ namespace utf8
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp))
{
uint32_t lead_surrogate = cp;
uint32_t trail_surrogate = internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10) +
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
}
(*result++) = cp;
}
@ -161,10 +167,14 @@ namespace utf8
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start < end) {
while (start < end)
{
uint32_t cp = next(start);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
if (cp > 0xffff)
{
cp -= 0x10000;
//make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
@ -190,8 +200,9 @@ namespace utf8
uint32_t cp = *start++;
if (cp > 0xffff)
{
cp -= 0x10000;
//make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else