1651fcd980
git-svn-id: http://mc-server.googlecode.com/svn/trunk@800 0a769ca7-a7f5-676a-18bf-c427514a06d6
467 lines
10 KiB
C++
467 lines
10 KiB
C++
|
|
// StringUtils.cpp
|
|
|
|
// Implements the various string helper functions:
|
|
|
|
#include "Globals.h"
|
|
|
|
#if defined(ANDROID_NDK)
|
|
#include <ctype.h>
|
|
#endif
|
|
|
|
|
|
|
|
|
|
AString & AppendVPrintf(AString & str, const char *format, va_list args)
|
|
{
|
|
ASSERT(format != NULL);
|
|
|
|
char buffer[2048];
|
|
size_t len;
|
|
#ifdef _MSC_VER
|
|
// MS CRT provides secure printf that doesn't behave like in the C99 standard
|
|
if ((len = _vsnprintf_s(buffer, ARRAYCOUNT(buffer), _TRUNCATE, format, args)) != -1)
|
|
#else // _MSC_VER
|
|
if ((len = vsnprintf(buffer, ARRAYCOUNT(buffer), format, args)) < ARRAYCOUNT(buffer))
|
|
#endif // else _MSC_VER
|
|
{
|
|
// The result did fit into the static buffer
|
|
str.append(buffer, len);
|
|
return str;
|
|
}
|
|
|
|
// The result did not fit into the static buffer
|
|
#ifdef _MSC_VER
|
|
// for MS CRT, we need to calculate the result length
|
|
len = _vscprintf(format, args);
|
|
if (len == -1)
|
|
{
|
|
return str;
|
|
}
|
|
#endif // _MSC_VER
|
|
|
|
// Allocate a buffer and printf into it:
|
|
str.resize(len + 1);
|
|
// HACK: we're accessing AString's internal buffer in a way that is NOT guaranteed to always work. But it works on all STL implementations tested.
|
|
// I can't think of any other way that is safe, doesn't allocate twice as much space as needed and doesn't use C++11 features like the move constructor
|
|
#ifdef _MSC_VER
|
|
vsprintf_s((char *)str.data(), len + 1, format, args);
|
|
#else // _MSC_VER
|
|
vsnprintf((char *)str.data(), len + 1, format, args);
|
|
#endif // else _MSC_VER
|
|
str.resize(len);
|
|
return str;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
AString & Printf(AString & str, const char *format, ...)
|
|
{
|
|
str.clear();
|
|
va_list args;
|
|
va_start(args, format);
|
|
std::string &retval = AppendVPrintf(str, format, args);
|
|
va_end(args);
|
|
return retval;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
AString & AppendPrintf(AString &str, const char *format, ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
std::string &retval = AppendVPrintf(str, format, args);
|
|
va_end(args);
|
|
return retval;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
AStringVector StringSplit(const AString & str, const AString & delim)
|
|
{
|
|
AStringVector results;
|
|
size_t cutAt = 0;
|
|
size_t Prev = 0;
|
|
while ((cutAt = str.find_first_of(delim, Prev)) != str.npos)
|
|
{
|
|
results.push_back(str.substr(Prev, cutAt - Prev));
|
|
Prev = cutAt + delim.length();
|
|
}
|
|
if (Prev < str.length())
|
|
{
|
|
results.push_back(str.substr(Prev));
|
|
}
|
|
return results;
|
|
}
|
|
|
|
|
|
|
|
|
|
AString TrimString(const AString & str)
|
|
{
|
|
size_t len = str.length();
|
|
size_t start = 0;
|
|
while (start < len)
|
|
{
|
|
if (str[start] > 32)
|
|
{
|
|
break;
|
|
}
|
|
++start;
|
|
}
|
|
if (start == len)
|
|
{
|
|
return "";
|
|
}
|
|
|
|
size_t end = len;
|
|
while (end >= start)
|
|
{
|
|
if (str[end] > 32)
|
|
{
|
|
break;
|
|
}
|
|
--end;
|
|
}
|
|
|
|
return str.substr(start, end - start + 1);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
AString & StrToUpper(AString & s)
|
|
{
|
|
AString::iterator i = s.begin();
|
|
AString::iterator end = s.end();
|
|
|
|
while (i != end)
|
|
{
|
|
*i = (char)toupper(*i);
|
|
++i;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int NoCaseCompare(const AString & s1, const AString & s2)
|
|
{
|
|
#ifdef _MSC_VER
|
|
// MSVC has stricmp that compares case-insensitive:
|
|
return _stricmp(s1.c_str(), s2.c_str());
|
|
#else
|
|
// Do it the hard way:
|
|
AString s1Copy(s1);
|
|
AString s2Copy(s2);
|
|
return StrToUpper(s1Copy).compare(StrToUpper(s2Copy));
|
|
#endif // else _MSC_VER
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int RateCompareString(const AString & s1, const AString & s2 )
|
|
{
|
|
unsigned int MatchedLetters = 0;
|
|
unsigned int s1Length = s1.length();
|
|
|
|
if( s1Length > s2.length() ) return 0; // Definitely not a match
|
|
|
|
for (unsigned int i = 0; i < s1Length; i++)
|
|
{
|
|
char c1 = (char)toupper( s1[i] );
|
|
char c2 = (char)toupper( s2[i] );
|
|
if( c1 == c2 )
|
|
{
|
|
++MatchedLetters;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
return MatchedLetters;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ReplaceString(AString & iHayStack, const AString & iNeedle, const AString & iReplaceWith)
|
|
{
|
|
size_t pos1 = iHayStack.find(iNeedle);
|
|
while (pos1 != AString::npos)
|
|
{
|
|
iHayStack.replace( pos1, iNeedle.size(), iReplaceWith);
|
|
pos1 = iHayStack.find(iNeedle, pos1);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
AStringList GetDirectoryContents(const char * a_Directory)
|
|
{
|
|
AStringList AllFiles;
|
|
|
|
#ifdef _WIN32
|
|
|
|
AString FileFilter = AString(a_Directory) + "*.*";
|
|
HANDLE hFind;
|
|
WIN32_FIND_DATA FindFileData;
|
|
|
|
if ((hFind = FindFirstFile(FileFilter.c_str(), &FindFileData)) != INVALID_HANDLE_VALUE)
|
|
{
|
|
do
|
|
{
|
|
AllFiles.push_back(FindFileData.cFileName);
|
|
} while (FindNextFile(hFind, &FindFileData));
|
|
FindClose(hFind);
|
|
}
|
|
|
|
#else // _WIN32
|
|
|
|
DIR * dp;
|
|
struct dirent *dirp;
|
|
if (*a_Directory == 0)
|
|
{
|
|
a_Directory = ".";
|
|
}
|
|
if ((dp = opendir(a_Directory)) == NULL)
|
|
{
|
|
LOGERROR("Error (%i) opening directory \"%s\"\n", errno, a_Directory );
|
|
}
|
|
else
|
|
{
|
|
while ((dirp = readdir(dp)) != NULL)
|
|
{
|
|
AllFiles.push_back(dirp->d_name);
|
|
}
|
|
closedir(dp);
|
|
}
|
|
|
|
#endif // else _WIN32
|
|
|
|
return AllFiles;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Converts a stream of BE shorts into UTF-8 string; returns a ref to a_UTF8
|
|
AString & RawBEToUTF8(short * a_RawData, int a_NumShorts, AString & a_UTF8)
|
|
{
|
|
a_UTF8.clear();
|
|
a_UTF8.reserve(3 * a_NumShorts / 2); // a quick guess of the resulting size
|
|
for (int i = 0; i < a_NumShorts; i++)
|
|
{
|
|
int c = ntohs(*(a_RawData + i));
|
|
if (c < 0x80)
|
|
{
|
|
a_UTF8.push_back((char)c);
|
|
}
|
|
else if (c < 0x800)
|
|
{
|
|
a_UTF8.push_back((char)(192 + c / 64));
|
|
a_UTF8.push_back((char)(128 + c % 64));
|
|
}
|
|
else if (c - 0xd800u < 0x800)
|
|
{
|
|
// Error, silently drop
|
|
}
|
|
else if (c < 0x10000)
|
|
{
|
|
a_UTF8.push_back((char)(224 + c / 4096));
|
|
a_UTF8.push_back((char)(128 + c / 64 % 64));
|
|
a_UTF8.push_back((char)(128 + c % 64));
|
|
}
|
|
else if (c < 0x110000)
|
|
{
|
|
a_UTF8.push_back((char)(240 + c / 262144));
|
|
a_UTF8.push_back((char)(128 + c / 4096 % 64));
|
|
a_UTF8.push_back((char)(128 + c / 64 % 64));
|
|
a_UTF8.push_back((char)(128 + c % 64));
|
|
}
|
|
else
|
|
{
|
|
// Error, silently drop
|
|
}
|
|
}
|
|
return a_UTF8;
|
|
}
|
|
|
|
|
|
|
|
|
|
// UTF-8 conversion code adapted from:
|
|
// http://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c
|
|
|
|
#define UNI_MAX_BMP 0x0000FFFF
|
|
#define UNI_MAX_UTF16 0x0010FFFF
|
|
#define UNI_MAX_UTF32 0x7FFFFFFF
|
|
#define UNI_MAX_LEGAL_UTF32 0x0010FFFF
|
|
#define UNI_SUR_HIGH_START 0xD800
|
|
#define UNI_SUR_HIGH_END 0xDBFF
|
|
#define UNI_SUR_LOW_START 0xDC00
|
|
#define UNI_SUR_LOW_END 0xDFFF
|
|
|
|
|
|
|
|
|
|
|
|
static const char trailingBytesForUTF8[256] =
|
|
{
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static const unsigned int offsetsFromUTF8[6] =
|
|
{
|
|
0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
|
0x03C82080UL, 0xFA082080UL, 0x82082080UL
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static bool isLegalUTF8(const unsigned char * source, int length)
|
|
{
|
|
unsigned char a;
|
|
const unsigned char * srcptr = source + length;
|
|
switch (length)
|
|
{
|
|
default: return false;
|
|
// Everything else falls through when "true"...
|
|
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
|
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
|
case 2:
|
|
{
|
|
if ((a = (*--srcptr)) > 0xBF) return false;
|
|
switch (*source)
|
|
{
|
|
// no fall-through in this inner switch
|
|
case 0xE0: if (a < 0xA0) return false; break;
|
|
case 0xED: if (a > 0x9F) return false; break;
|
|
case 0xF0: if (a < 0x90) return false; break;
|
|
case 0xF4: if (a > 0x8F) return false; break;
|
|
default: if (a < 0x80) return false;
|
|
}
|
|
}
|
|
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
|
}
|
|
if (*source > 0xF4) return false;
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
AString & UTF8ToRawBEUTF16(const char * a_UTF8, size_t a_UTF8Length, AString & a_UTF16)
|
|
{
|
|
a_UTF16.clear();
|
|
a_UTF16.reserve(a_UTF8Length * 3);
|
|
|
|
const unsigned char * source = (const unsigned char*)a_UTF8;
|
|
const unsigned char * sourceEnd = source + a_UTF8Length;
|
|
const int halfShift = 10; // used for shifting by 10 bits
|
|
const unsigned int halfBase = 0x0010000UL;
|
|
const unsigned int halfMask = 0x3FFUL;
|
|
|
|
while (source < sourceEnd)
|
|
{
|
|
unsigned int ch = 0;
|
|
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
|
if (source + extraBytesToRead >= sourceEnd)
|
|
{
|
|
return a_UTF16;
|
|
}
|
|
// Do this check whether lenient or strict
|
|
if (!isLegalUTF8(source, extraBytesToRead + 1))
|
|
{
|
|
return a_UTF16;
|
|
break;
|
|
}
|
|
|
|
// The cases all fall through. See "Note A" below.
|
|
switch (extraBytesToRead)
|
|
{
|
|
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
|
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
|
case 3: ch += *source++; ch <<= 6;
|
|
case 2: ch += *source++; ch <<= 6;
|
|
case 1: ch += *source++; ch <<= 6;
|
|
case 0: ch += *source++;
|
|
}
|
|
ch -= offsetsFromUTF8[extraBytesToRead];
|
|
|
|
if (ch <= UNI_MAX_BMP)
|
|
{
|
|
// Target is a character <= 0xFFFF
|
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
|
|
{
|
|
// UTF-16 surrogate values are illegal in UTF-32
|
|
ch = ' ';
|
|
}
|
|
unsigned short v = htons((unsigned short)ch);
|
|
a_UTF16.append((const char *)&v, 2);
|
|
}
|
|
else if (ch > UNI_MAX_UTF16)
|
|
{
|
|
// Invalid value, replace with a space
|
|
unsigned short v = htons(' ');
|
|
a_UTF16.append((const char *)&v, 2);
|
|
}
|
|
else
|
|
{
|
|
// target is a character in range 0xFFFF - 0x10FFFF.
|
|
ch -= halfBase;
|
|
unsigned short v1 = htons((ch >> halfShift) + UNI_SUR_HIGH_START);
|
|
unsigned short v2 = htons((ch & halfMask) + UNI_SUR_LOW_START);
|
|
a_UTF16.append((const char *)&v1, 2);
|
|
a_UTF16.append((const char *)&v2, 2);
|
|
}
|
|
}
|
|
return a_UTF16;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------
|
|
|
|
Note A.
|
|
The fall-through switches in UTF-8 reading code save a
|
|
temp variable, some decrements & conditionals. The switches
|
|
are equivalent to the following loop:
|
|
{
|
|
int tmpBytesToRead = extraBytesToRead+1;
|
|
do {
|
|
ch += *source++;
|
|
--tmpBytesToRead;
|
|
if (tmpBytesToRead) ch <<= 6;
|
|
} while (tmpBytesToRead > 0);
|
|
}
|
|
|
|
--------------------------------------------------------------------- */ |