Fix utf16 conversion
Follow the rules in: https://github.com/brofield/simpleini/blob/master/ConvertUTF.c
This commit is contained in:
parent
5af761b841
commit
0fdbe5d34f
@ -209,17 +209,22 @@ namespace utf8
|
||||
while (start != end) {
|
||||
uint32_t cp = internal::mask16(*start++);
|
||||
// Take care of surrogate pairs first
|
||||
if (internal::is_lead_surrogate(cp)) {
|
||||
if (start != end) {
|
||||
if (internal::is_lead_surrogate(cp))
|
||||
{
|
||||
uint32_t lead_surrogate = cp;
|
||||
if (start != end)
|
||||
{
|
||||
uint32_t trail_surrogate = internal::mask16(*start++);
|
||||
if (internal::is_trail_surrogate(trail_surrogate))
|
||||
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
|
||||
{
|
||||
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10) +
|
||||
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
|
||||
}
|
||||
else
|
||||
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
|
||||
}
|
||||
else
|
||||
throw invalid_utf16(static_cast<uint16_t>(cp));
|
||||
|
||||
}
|
||||
// Lone trail surrogate
|
||||
else if (internal::is_trail_surrogate(cp))
|
||||
@ -239,17 +244,20 @@ namespace utf8
|
||||
// Take care of surrogate pairs first
|
||||
if (internal::is_lead_surrogate(cp))
|
||||
{
|
||||
uint32_t lead_surrogate = cp;
|
||||
if (start != end)
|
||||
{
|
||||
uint32_t trail_surrogate = internal::mask16(*start++);
|
||||
if (internal::is_trail_surrogate(trail_surrogate))
|
||||
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
|
||||
{
|
||||
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10 ) +
|
||||
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
|
||||
}
|
||||
else
|
||||
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
|
||||
}
|
||||
else
|
||||
throw invalid_utf16(static_cast<uint16_t>(cp));
|
||||
|
||||
}
|
||||
// Lone trail surrogate
|
||||
else if (internal::is_trail_surrogate(cp))
|
||||
@ -263,10 +271,14 @@ namespace utf8
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
|
||||
{
|
||||
while (start != end) {
|
||||
while (start != end)
|
||||
{
|
||||
uint32_t cp = next(start, end);
|
||||
if (cp > 0xffff) { //make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
|
||||
if (cp > 0xffff)
|
||||
{
|
||||
cp -= 0x10000;
|
||||
//make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
|
||||
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
|
||||
}
|
||||
else
|
||||
@ -292,8 +304,9 @@ namespace utf8
|
||||
uint32_t cp = *start++;
|
||||
if (cp > 0xffff)
|
||||
{
|
||||
cp -= 0x10000;
|
||||
//make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
|
||||
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
|
||||
}
|
||||
else
|
||||
|
@ -129,12 +129,16 @@ namespace utf8
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
|
||||
{
|
||||
while (start != end) {
|
||||
while (start != end)
|
||||
{
|
||||
uint32_t cp = internal::mask16(*start++);
|
||||
// Take care of surrogate pairs first
|
||||
if (internal::is_lead_surrogate(cp)) {
|
||||
// Take care of surrogate pairs first
|
||||
if (internal::is_lead_surrogate(cp))
|
||||
{
|
||||
uint32_t lead_surrogate = cp;
|
||||
uint32_t trail_surrogate = internal::mask16(*start++);
|
||||
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
|
||||
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10) +
|
||||
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
|
||||
}
|
||||
result = append(cp, result);
|
||||
}
|
||||
@ -150,8 +154,10 @@ namespace utf8
|
||||
// Take care of surrogate pairs first
|
||||
if (internal::is_lead_surrogate(cp))
|
||||
{
|
||||
uint32_t lead_surrogate = cp;
|
||||
uint32_t trail_surrogate = internal::mask16(*start++);
|
||||
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
|
||||
cp = ((lead_surrogate - internal::LEAD_SURROGATE_MIN) << 10) +
|
||||
(trail_surrogate - internal::TRAIL_SURROGATE_MIN) + 0x10000;
|
||||
}
|
||||
(*result++) = cp;
|
||||
}
|
||||
@ -161,10 +167,14 @@ namespace utf8
|
||||
template <typename u16bit_iterator, typename octet_iterator>
|
||||
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
|
||||
{
|
||||
while (start < end) {
|
||||
while (start < end)
|
||||
{
|
||||
uint32_t cp = next(start);
|
||||
if (cp > 0xffff) { //make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
|
||||
if (cp > 0xffff)
|
||||
{
|
||||
cp -= 0x10000;
|
||||
//make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
|
||||
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
|
||||
}
|
||||
else
|
||||
@ -190,8 +200,9 @@ namespace utf8
|
||||
uint32_t cp = *start++;
|
||||
if (cp > 0xffff)
|
||||
{
|
||||
cp -= 0x10000;
|
||||
//make a surrogate pair
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
|
||||
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_SURROGATE_MIN);
|
||||
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
|
||||
}
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user