Refactored to put URL Encoding / Decoding in a single place. (#3491)

2016-12-25 18:29:21 +01:00 · 2016-12-25 18:29:21 +01:00 · b3b723b453
commit b3b723b453
parent efc7fed05b
7 changed files with 297 additions and 106 deletions
--- a/Server/Plugins/APIDump/APIDesc.lua
+++ b/Server/Plugins/APIDump/APIDesc.lua
@ -13534,6 +13534,44 @@ local CompressedString = cStringCompression.CompressStringGZIP("DataToCompress")
 					},
 					Notes = "Parses the Authority part of the URL. Parts that are not explicitly specified in the AuthPart are returned empty, the port is returned zero. If parsing fails, the function returns nil and an error message.",
 				},
+				UrlDecode =
+				{
+					IsStatic = true,
+					Params =
+					{
+						{
+							Name = "Text",
+							Type = "string",
+						},
+					},
+					Returns =
+					{
+						{
+							Name = "Decoded",
+							Type = "string",
+						},
+					},
+					Notes = "Returns the Text, URL-decoded. Returns nil if there is a problem while decoding (invalid input).",
+				},
+				UrlEncode =
+				{
+					IsStatic = true,
+					Params =
+					{
+						{
+							Name = "Text",
+							Type = "string",
+						},
+					},
+					Returns =
+					{
+						{
+							Name = "Encoded",
+							Type = "string",
+						},
+					},
+					Notes = "Returns the Text, URL-encoded.",
+				},
 			},
 			AdditionalInfo =
 			{
--- a/Server/Plugins/APIDump/Classes/WebAdmin.lua
+++ b/Server/Plugins/APIDump/Classes/WebAdmin.lua
@ -121,6 +121,7 @@ return
 			GetURLEncodedString =
 			{
 				IsStatic = true,
+				ObsoletedBy = "cUrlParser:UrlEncode",
 				Params =
 				{
 					{
@ -134,7 +135,7 @@ return
 						Type = "string",
 					},
 				},
-				Notes = "Returns the string given to it escaped by URL encoding, which makes the string suitable for transmission in an URL. Invalid characters are turned into \"%xy\" values.",
+				Notes = "<b>OBSOLETE</b> - use {{cUrlParser}}:UrlEncode() instead.<br/>Returns the string given to it escaped by URL encoding, which makes the string suitable for transmission in an URL. Invalid characters are turned into \"%xy\" values.",
 			},
 			Reload =
 			{
--- a/src/Bindings/ManualBindings.cpp
+++ b/src/Bindings/ManualBindings.cpp
@ -2112,6 +2112,66 @@ static int tolua_cUrlParser_ParseAuthorityPart(lua_State * a_LuaState)



+static int tolua_cUrlParser_UrlDecode(lua_State * tolua_S)
+{
+	// Check the param types:
+	cLuaState S(tolua_S);
+	if (
+		// Don't care about the first param
+		!S.CheckParamString(2) ||
+		!S.CheckParamEnd(3)
+	)
+	{
+		return 0;
+	}
+
+	// Get the parameters:
+	AString Input;
+	S.GetStackValue(2, Input);
+
+	// Convert and return:
+	auto res = URLDecode(Input);
+	if (res.first)
+	{
+		S.Push(res.second);
+	}
+	else
+	{
+		S.Push(cLuaState::Nil);
+	}
+	return 1;
+}
+
+
+
+
+
+static int tolua_cUrlParser_UrlEncode(lua_State * tolua_S)
+{
+	// Check the param types:
+	cLuaState S(tolua_S);
+	if (
+		// Don't care about the first param
+		!S.CheckParamString(2) ||
+		!S.CheckParamEnd(3)
+	)
+	{
+		return 0;
+	}
+
+	// Get the parameters:
+	AString Input;
+	S.GetStackValue(2, Input);
+
+	// Convert and return:
+	S.Push(URLEncode(Input));
+	return 1;
+}
+
+
+
+
+
 static int tolua_cWebAdmin_AddWebTab(lua_State * tolua_S)
 {
 	// Function signatures:
@ -2324,28 +2384,15 @@ static int tolua_cWebAdmin_GetPage(lua_State * tolua_S)



-/** Binding for cWebAdmin::GetURLEncodedString.
-Manual code required because ToLua generates an extra return value */
+/** Binding for cWebAdmin::GetURLEncodedString. */
 static int tolua_cWebAdmin_GetURLEncodedString(lua_State * tolua_S)
 {
-	// Check the param types:
+	// Emit the obsoletion warning:
 	cLuaState S(tolua_S);
-	if (
-		// Don't care whether the first param is a cWebAdmin instance or class
-		!S.CheckParamString(2) ||
-		!S.CheckParamEnd(3)
-	)
-	{
-		return 0;
-	}
+	LOGWARNING("cWebAdmin:GetURLEncodedString() is obsolete, use cUrlParser:UrlEncode() instead.");
+	S.LogStackTrace();

-	// Get the parameters:
-	AString Input;
-	S.GetStackValue(2, Input);
-
-	// Convert and return:
-	S.Push(cWebAdmin::GetURLEncodedString(Input));
-	return 1;
+	return tolua_cUrlParser_UrlEncode(tolua_S);
 }


@ -4042,6 +4089,8 @@ void cManualBindings::Bind(lua_State * tolua_S)
 			tolua_function(tolua_S, "IsKnownScheme",      tolua_cUrlParser_IsKnownScheme);
 			tolua_function(tolua_S, "Parse",              tolua_cUrlParser_Parse);
 			tolua_function(tolua_S, "ParseAuthorityPart", tolua_cUrlParser_ParseAuthorityPart);
+			tolua_function(tolua_S, "UrlDecode",          tolua_cUrlParser_UrlDecode);
+			tolua_function(tolua_S, "UrlEncode",          tolua_cUrlParser_UrlEncode);
 		tolua_endmodule(tolua_S);

 		tolua_beginmodule(tolua_S, "cWebAdmin");
--- a/src/HTTP/HTTPFormParser.cpp
+++ b/src/HTTP/HTTPFormParser.cpp
@ -167,13 +167,22 @@ void cHTTPFormParser::ParseFormUrlEncoded(void)
 			case 1:
 			{
 				// Only name present
-				(*this)[URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '))] = "";
+				auto name = URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '));
+				if (name.first)
+				{
+					(*this)[name.second] = "";
+				}
 				break;
 			}
 			case 2:
 			{
 				// name=value format:
-				(*this)[URLDecode(ReplaceAllCharOccurrences(Components[0], '+', ' '))] = URLDecode(ReplaceAllCharOccurrences(Components[1], '+', ' '));
+				auto name = URLDecode(Components[0]);
+				auto value = URLDecode(Components[1]);
+				if (name.first && value.first)
+				{
+					(*this)[name.second] = value.second;
+				}
 				break;
 			}
 		}
--- a/src/StringUtils.cpp
+++ b/src/StringUtils.cpp
@ -14,6 +14,42 @@



+/** Returns the value of the single hex digit.
+Returns 0xff on failure. */
+static unsigned char HexToDec(char a_HexChar)
+{
+	switch (a_HexChar)
+	{
+		case '0': return 0;
+		case '1': return 1;
+		case '2': return 2;
+		case '3': return 3;
+		case '4': return 4;
+		case '5': return 5;
+		case '6': return 6;
+		case '7': return 7;
+		case '8': return 8;
+		case '9': return 9;
+		case 'a': return 10;
+		case 'b': return 11;
+		case 'c': return 12;
+		case 'd': return 13;
+		case 'e': return 14;
+		case 'f': return 15;
+		case 'A': return 10;
+		case 'B': return 11;
+		case 'C': return 12;
+		case 'D': return 13;
+		case 'E': return 14;
+		case 'F': return 15;
+	}
+	return 0xff;
+}
+
+
+
+
+
 AString & AppendVPrintf(AString & str, const char * format, va_list args)
 {
 	ASSERT(format != nullptr);
@ -353,37 +389,7 @@ AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UT
 	a_UTF8.reserve(3 * a_NumShorts / 2);  // a quick guess of the resulting size
 	for (size_t i = 0; i < a_NumShorts; i++)
 	{
-		int c = GetBEShort(&a_RawData[i * 2]);
-		if (c < 0x80)
-		{
-			a_UTF8.push_back(static_cast<char>(c));
-		}
-		else if (c < 0x800)
-		{
-			a_UTF8.push_back(static_cast<char>(192 + c / 64));
-			a_UTF8.push_back(static_cast<char>(128 + c % 64));
-		}
-		else if (c - 0xd800 < 0x800)
-		{
-			// Error, silently drop
-		}
-		else if (c < 0x10000)
-		{
-			a_UTF8.push_back(static_cast<char>(224 + c / 4096));
-			a_UTF8.push_back(static_cast<char>(128 + (c / 64) % 64));
-			a_UTF8.push_back(static_cast<char>(128 + c % 64));
-		}
-		else if (c < 0x110000)
-		{
-			a_UTF8.push_back(static_cast<char>(240 + c / 262144));
-			a_UTF8.push_back(static_cast<char>(128 + (c / 4096) % 64));
-			a_UTF8.push_back(static_cast<char>(128 + (c / 64) % 64));
-			a_UTF8.push_back(static_cast<char>(128 + c % 64));
-		}
-		else
-		{
-			// Error, silently drop
-		}
+		a_UTF8.append(UnicodeCharToUtf8(GetBEUShort(&a_RawData[i * 2])));
 	}
 	return a_UTF8;
 }
@ -391,6 +397,55 @@ AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UT



+AString UnicodeCharToUtf8(unsigned a_UnicodeChar)
+{
+	if (a_UnicodeChar < 0x80)
+	{
+		return AString{static_cast<char>(a_UnicodeChar)};
+	}
+	else if (a_UnicodeChar < 0x800)
+	{
+		return AString
+		{
+			static_cast<char>(192 + a_UnicodeChar / 64),
+			static_cast<char>(128 + a_UnicodeChar % 64),
+		};
+	}
+	else if (a_UnicodeChar - 0xd800 < 0x800)
+	{
+		// Error
+		return AString();
+	}
+	else if (a_UnicodeChar < 0x10000)
+	{
+		return AString
+		{
+			static_cast<char>(224 + a_UnicodeChar / 4096),
+			static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
+			static_cast<char>(128 + a_UnicodeChar % 64)
+		};
+	}
+	else if (a_UnicodeChar < 0x110000)
+	{
+		return AString
+		{
+			static_cast<char>(240 + a_UnicodeChar / 262144),
+			static_cast<char>(128 + (a_UnicodeChar / 4096) % 64),
+			static_cast<char>(128 + (a_UnicodeChar / 64) % 64),
+			static_cast<char>(128 + a_UnicodeChar % 64),
+		};
+	}
+	else
+	{
+		// Error
+		return AString();
+	}
+}
+
+
+
+
+
 // UTF-8 conversion code adapted from:
 //  https://stackoverflow.com/questions/2867123/convert-utf-16-to-utf-8-under-windows-and-linux-in-c

@ -708,58 +763,99 @@ AString StripColorCodes(const AString & a_Message)



-AString URLDecode(const AString & a_String)
+std::pair<bool, AString> URLDecode(const AString & a_Text)
 {
 	AString res;
-	size_t len = a_String.length();
+	auto len = a_Text.size();
 	res.reserve(len);
 	for (size_t i = 0; i < len; i++)
 	{
-		char ch = a_String[i];
-		if ((ch != '%') || (i > len - 3))
+		if (a_Text[i] == '+')
 		{
-			res.push_back(ch);
+			res.push_back(' ');
 			continue;
 		}
-		// Decode the hex value:
-		char hi = a_String[i + 1], lo = a_String[i + 2];
-		if ((hi >= '0') && (hi <= '9'))
+		if (a_Text[i] != '%')
 		{
-			hi = hi - '0';
+			res.push_back(a_Text[i]);
+			continue;
 		}
-		else if ((hi >= 'a') && (hi <= 'f'))
+		if (i + 1 >= len)
 		{
-			hi = hi - 'a' + 10;
+			// String too short for an encoded value
+			return std::make_pair(false, AString());
 		}
-		else if ((hi >= 'A') && (hi <= 'F'))
+		if ((a_Text[i + 1] == 'u') || (a_Text[i + 1] == 'U'))
 		{
-			hi = hi - 'F' + 10;
+			// Unicode char "%u0xxxx"
+			if (i + 6 >= len)
+			{
+				return std::make_pair(false, AString());
+			}
+			if (a_Text[i + 2] != '0')
+			{
+				return std::make_pair(false, AString());
+			}
+			unsigned v1 = HexToDec(a_Text[i + 3]);
+			unsigned v2 = HexToDec(a_Text[i + 4]);
+			unsigned v3 = HexToDec(a_Text[i + 5]);
+			unsigned v4 = HexToDec(a_Text[i + 6]);
+			if ((v1 == 0xff) || (v2 == 0xff) || (v4 == 0xff) || (v3 == 0xff))
+			{
+				// Invalid hex numbers
+				return std::make_pair(false, AString());
+			}
+			res.append(UnicodeCharToUtf8((v1 << 12) | (v2 << 8) | (v3 << 4) | v4));
+			i = i + 6;
 		}
 		else
 		{
-			res.push_back(ch);
-			continue;
+			// Regular char "%xx":
+			if (i + 2 >= len)
+			{
+				return std::make_pair(false, AString());
+			}
+			auto v1 = HexToDec(a_Text[i + 1]);
+			auto v2 = HexToDec(a_Text[i + 2]);
+			if ((v1 == 0xff) || (v2 == 0xff))
+			{
+				// Invalid hex numbers
+				return std::make_pair(false, AString());
+			}
+			res.push_back(static_cast<char>((v1 << 4) | v2));
+			i = i + 2;
 		}
-		if ((lo >= '0') && (lo <= '9'))
+	}  // for i - a_Text[i]
+	return std::make_pair(true, res);
+}
+
+
+
+
+
+AString URLEncode(const AString & a_Text)
+{
+	AString res;
+	auto len = a_Text.size();
+	res.reserve(len);
+	static const char HEX[] = "0123456789abcdef";
+	for (size_t i = 0; i < len; ++i)
+	{
+		if (isalnum(a_Text[i]))
 		{
-			lo = lo - '0';
+			res.push_back(a_Text[i]);
 		}
-		else if ((lo >= 'a') && (lo <= 'f'))
+		else if (a_Text[i] == ' ')
 		{
-			lo = lo - 'a' + 10;
-		}
-		else if ((lo >= 'A') && (lo <= 'F'))
-		{
-			lo = lo - 'A' + 10;
+			res.push_back('+');
 		}
 		else
 		{
-			res.push_back(ch);
-			continue;
+			res.push_back('%');
+			res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) >> 4]);
+			res.push_back(HEX[static_cast<unsigned char>(a_Text[i]) & 0x0f]);
 		}
-		res.push_back(static_cast<char>((hi << 4) | lo));
-		i += 2;
-	}  // for i - a_String[]
+	}
 	return res;
 }

@ -907,6 +1003,16 @@ short GetBEShort(const char * a_Mem)



+unsigned short GetBEUShort(const char * a_Mem)
+{
+	const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
+	return static_cast<unsigned short>((Bytes[0] << 8) | Bytes[1]);
+}
+
+
+
+
+
 int GetBEInt(const char * a_Mem)
 {
 	const Byte * Bytes = reinterpret_cast<const Byte *>(a_Mem);
--- a/src/StringUtils.h
+++ b/src/StringUtils.h
@ -85,6 +85,9 @@ extern void ReplaceString(AString & iHayStack, const AString & iNeedle, const AS
 /** Converts a stream of BE shorts into UTF-8 string; returns a_UTF8. */
 extern AString & RawBEToUTF8(const char * a_RawData, size_t a_NumShorts, AString & a_UTF8);

+/** Converts a unicode character to its UTF8 representation. */
+extern AString UnicodeCharToUtf8(unsigned a_UnicodeChar);
+
 /** Converts a UTF-8 string into a UTF-16 BE string. */
 extern std::u16string UTF8ToRawBEUTF16(const AString & a_String);

@ -98,8 +101,13 @@ extern AString EscapeString(const AString & a_Message);  // tolua_export
 /** Removes all control codes used by MC for colors and styles. */
 extern AString StripColorCodes(const AString & a_Message);  // tolua_export

-/** URL-Decodes the given string, replacing all "%HH" into the correct characters. Invalid % sequences are left intact */
-extern AString URLDecode(const AString & a_String);  // Cannot export to Lua automatically - would generated an extra return value
+/** URL-Decodes the given string.
+The first value specifies whether the decoding was successful.
+The second value is the decoded string, if successful. */
+extern std::pair<bool, AString> URLDecode(const AString & a_String);  // Exported to Lua as cUrlParser::UrlDecode()
+
+/** URL-encodes the given string. */
+extern AString URLEncode(const AString & a_Text);

 /** Replaces all occurrences of char a_From inside a_String with char a_To. */
 extern AString ReplaceAllCharOccurrences(const AString & a_String, char a_From, char a_To);  // Needn't export to Lua, since Lua doesn't have chars anyway
@ -113,6 +121,9 @@ extern AString Base64Encode(const AString & a_Input);  // Exported manually due
 /** Reads two bytes from the specified memory location and interprets them as BigEndian short */
 extern short GetBEShort(const char * a_Mem);

+/** Reads two bytes from the specified memory location and interprets them as BigEndian unsigned short */
+extern unsigned short GetBEUShort(const char * a_Mem);
+
 /** Reads four bytes from the specified memory location and interprets them as BigEndian int */
 extern int GetBEInt(const char * a_Mem);

--- a/src/WebAdmin.cpp
+++ b/src/WebAdmin.cpp
@ -602,30 +602,7 @@ AString cWebAdmin::GetHTMLEscapedString(const AString & a_Input)

 AString cWebAdmin::GetURLEncodedString(const AString & a_Input)
 {
-	// Translation table from nibble to hex:
-	static const char Hex[] = "0123456789abcdef";
-
-	// Preallocate the output to match input:
-	AString dst;
-	size_t len = a_Input.length();
-	dst.reserve(len);
-
-	// Loop over input and substitute whatever is needed:
-	for (size_t i = 0; i < len; i++)
-	{
-		char ch = a_Input[i];
-		if (isalnum(ch) || (ch == '-') || (ch == '_') || (ch == '.') || (ch == '~'))
-		{
-			dst.push_back(ch);
-		}
-		else
-		{
-			dst.push_back('%');
-			dst.push_back(Hex[(ch >> 4) & 0x0f]);
-			dst.push_back(Hex[ch & 0x0f]);
-		}
-	}  // for i - a_Input[]
-	return dst;
+	return URLEncode(a_Input);
 }