1
0

Merge pull request #2820 from cuberite/UrlParser

Added the cUrlParser class, exported to Lua API.
This commit is contained in:
worktycho 2015-12-26 19:01:37 +00:00
commit b99b60720a
8 changed files with 515 additions and 4 deletions

View File

@ -2286,7 +2286,50 @@ local CompressedString = cStringCompression.CompressStringGZIP("DataToCompress")
SetFuseTicks = { Return = "number", Notes = "Set the fuse ticks until the tnt will explode." },
},
Inherits = "cEntity",
}, -- cTNTEntity
cUrlParser =
{
Desc = [[
Provides a parser for generic URLs that returns the individual components of the URL.</p>
<p>
Note that all functions are static. Call them by using "cUrlParser:Parse(...)" etc.
]],
Functions =
{
GetDefaultPort = { Params = "Scheme", Return = "number", Notes = "(STATIC) Returns the default port that should be used for the given scheme (protocol). Returns zero if the scheme is not known." },
IsKnownScheme = { Params = "Scheme", Return = "bool", Notes = "(STATIC) Returns true if the scheme (protocol) is recognized by the parser." },
Parse = { Params = "URL", Return = "Scheme, Username, Password, Host, Port, Path, Query, Fragment", Notes = "(STATIC) Returns the individual parts of the URL. Parts that are not explicitly specified in the URL are empty, the default port for the scheme is used. If parsing fails, the function returns nil and an error message." },
ParseAuthorityPart = { Params = "AuthPart", Return = "Username, Password, Host, Port", Notes = "(STATIC) Parses the Authority part of the URL. Parts that are not explicitly specified in the AuthPart are returned empty, the port is returned zero. If parsing fails, the function returns nil and an error message." },
},
AdditionalInfo =
{
{
Header = "Code example",
Contents = [==[
The following code fragment uses the cUrlParser to parse an URL string into its components, and
prints those components out:
<pre class="prettyprint lang-lua">
local Scheme, Username, Password, Host, Port, Path, Query, Fragment = cUrlParser:Parse(
"http://anonymous:user@example.com@ftp.cuberite.org:9921/releases/2015/?sort=date#files"
)
if not(Scheme) then
LOG(" Error: " .. (username or "<nil>"))
else
LOG(" Scheme = " .. Scheme) -- "http"
LOG(" Username = " .. Username) -- "anonymous"
LOG(" Password = " .. Password) -- "user@example.com"
LOG(" Host = " .. Host) -- "ftp.cuberite.org"
LOG(" Port = " .. Port) -- 9921
LOG(" Path = " .. Path) -- "releases/2015/"
LOG(" Query = " .. Query) -- "sort=date"
LOG(" Fragment = " .. Fragment) -- "files"
end
</pre>
]==],
},
},
}, -- cUrlParser
cWebPlugin =
{

View File

@ -2030,6 +2030,47 @@ end
function HandleConsoleTestUrlParser(a_Split, a_EntireCmd)
LOG("Testing cUrlParser...")
local UrlsToTest =
{
"invalid URL",
"https://github.com",
"ftp://anonymous:user@example.com@ftp.cuberite.org:9921/releases/2015/2015-12-25.zip",
"ftp://anonymous:user:name:with:colons@example.com@ftp.cuberite.org:9921",
"http://google.com/",
"http://google.com/?q=cuberite",
"http://google.com/search?q=cuberite",
"http://google.com/some/search?q=cuberite#results",
"http://google.com/?q=cuberite#results",
"http://google.com/#results",
"ftp://cuberite.org:9921/releases/2015/2015-12-25.zip",
"mailto:support@cuberite.org",
}
for _, u in ipairs(UrlsToTest) do
LOG("URL: " .. u)
local scheme, username, password, host, port, path, query, fragment = cUrlParser:Parse(u)
if not(scheme) then
LOG(" Error: " .. (username or "<nil>"))
else
LOG(" Scheme = " .. scheme)
LOG(" Username = " .. username)
LOG(" Password = " .. password)
LOG(" Host = " .. host)
LOG(" Port = " .. port)
LOG(" Path = " .. path)
LOG(" Query = " .. query)
LOG(" Fragment = " .. fragment)
end
end
LOG("cUrlParser test complete")
return true
end
function HandleConsoleBBox(a_Split)
local bbox = cBoundingBox(0, 10, 0, 10, 0, 10)
local v1 = Vector3d(1, 1, 1)

View File

@ -253,6 +253,12 @@ g_PluginInfo =
Handler = HandleConsoleTestTracer,
HelpString = "Tests the cLineBlockTracer",
},
["testurlparser"] =
{
Handler = HandleConsoleTestUrlParser,
HelpString = "Tests the cUrlParser",
},
}, -- ConsoleCommands
} -- g_PluginInfo

View File

@ -154,5 +154,5 @@ endif()
if(NOT MSVC)
add_library(Bindings ${SRCS} ${HDRS})
target_link_libraries(Bindings lua sqlite tolualib mbedtls)
target_link_libraries(Bindings lua sqlite tolualib mbedtls HTTPServer)
endif()

View File

@ -36,6 +36,7 @@
#include "../StringCompression.h"
#include "../CommandOutput.h"
#include "../BuildInfo.h"
#include "../HTTPServer/UrlParser.h"
@ -1956,6 +1957,155 @@ static int tolua_get_HTTPRequest_FormData(lua_State* tolua_S)
static int tolua_cUrlParser_GetDefaultPort(lua_State * a_LuaState)
{
// API function signature:
// cUrlParser:GetDefaultPort("scheme") -> number
// Check params:
cLuaState L(a_LuaState);
if (
!L.CheckParamUserTable(1, "cUrlParser") ||
!L.CheckParamString(2) ||
!L.CheckParamEnd(3)
)
{
return 0;
}
// Read params from Lua:
AString scheme;
L.GetStackValue(2, scheme);
// Execute and push result:
L.Push(cUrlParser::GetDefaultPort(scheme));
return 1;
}
static int tolua_cUrlParser_IsKnownScheme(lua_State * a_LuaState)
{
// API function signature:
// cUrlParser:IsKnownScheme("scheme") -> bool
// Check params:
cLuaState L(a_LuaState);
if (
!L.CheckParamUserTable(1, "cUrlParser") ||
!L.CheckParamString(2) ||
!L.CheckParamEnd(3)
)
{
return 0;
}
// Read params from Lua:
AString scheme;
L.GetStackValue(2, scheme);
// Execute and push result:
L.Push(cUrlParser::IsKnownScheme(scheme));
return 1;
}
static int tolua_cUrlParser_Parse(lua_State * a_LuaState)
{
// API function signature:
// cUrlParser:Parse("url") -> "scheme", "user", "password", "host", portnum, "path", "query", "fragment"
// On error, returns nil and error message
// Check params:
cLuaState L(a_LuaState);
if (
!L.CheckParamUserTable(1, "cUrlParser") ||
!L.CheckParamString(2) ||
!L.CheckParamEnd(3)
)
{
return 0;
}
// Read params from Lua:
AString url;
L.GetStackValue(2, url);
// Execute and push result:
AString scheme, username, password, host, path, query, fragment;
UInt16 port;
auto res = cUrlParser::Parse(url, scheme, username, password, host, port, path, query, fragment);
if (!res.first)
{
// Error, return nil and error msg:
L.PushNil();
L.Push(res.second);
return 2;
}
L.Push(scheme);
L.Push(username);
L.Push(password);
L.Push(host);
L.Push(port);
L.Push(path);
L.Push(query);
L.Push(fragment);
return 8;
}
static int tolua_cUrlParser_ParseAuthorityPart(lua_State * a_LuaState)
{
// API function signature:
// cUrlParser:ParseAuthorityPart("authority") -> "user", "password", "host", portnum
// On error, returns nil and error message
// Parts not specified in the "authority" are left empty / zero
// Check params:
cLuaState L(a_LuaState);
if (
!L.CheckParamUserTable(1, "cUrlParser") ||
!L.CheckParamString(2) ||
!L.CheckParamEnd(3)
)
{
return 0;
}
// Read params from Lua:
AString authPart;
L.GetStackValue(2, authPart);
// Execute and push result:
AString username, password, host;
UInt16 port;
auto res = cUrlParser::ParseAuthorityPart(authPart, username, password, host, port);
if (!res.first)
{
// Error, return nil and error msg:
L.PushNil();
L.Push(res.second);
return 2;
}
L.Push(username);
L.Push(password);
L.Push(host);
L.Push(port);
return 4;
}
static int tolua_cWebAdmin_GetPlugins(lua_State * tolua_S)
{
cWebAdmin * self = reinterpret_cast<cWebAdmin *>(tolua_tousertype(tolua_S, 1, nullptr));
@ -3224,9 +3374,11 @@ void cManualBindings::Bind(lua_State * tolua_S)
tolua_usertype(tolua_S, "cCryptoHash");
tolua_usertype(tolua_S, "cLineBlockTracer");
tolua_usertype(tolua_S, "cStringCompression");
tolua_usertype(tolua_S, "cUrlParser");
tolua_cclass(tolua_S, "cCryptoHash", "cCryptoHash", "", nullptr);
tolua_cclass(tolua_S, "cLineBlockTracer", "cLineBlockTracer", "", nullptr);
tolua_cclass(tolua_S, "cStringCompression", "cStringCompression", "", nullptr);
tolua_cclass(tolua_S, "cUrlParser", "cUrlParser", "", nullptr);
// Globals:
tolua_function(tolua_S, "Clamp", tolua_Clamp);
@ -3390,6 +3542,13 @@ void cManualBindings::Bind(lua_State * tolua_S)
tolua_function(tolua_S, "InflateString", tolua_InflateString);
tolua_endmodule(tolua_S);
tolua_beginmodule(tolua_S, "cUrlParser");
tolua_function(tolua_S, "GetDefaultPort", tolua_cUrlParser_GetDefaultPort);
tolua_function(tolua_S, "IsKnownScheme", tolua_cUrlParser_IsKnownScheme);
tolua_function(tolua_S, "Parse", tolua_cUrlParser_Parse);
tolua_function(tolua_S, "ParseAuthorityPart", tolua_cUrlParser_ParseAuthorityPart);
tolua_endmodule(tolua_S);
tolua_beginmodule(tolua_S, "cWebAdmin");
tolua_function(tolua_S, "GetHTMLEscapedString", tolua_AllToLua_cWebAdmin_GetHTMLEscapedString);
tolua_function(tolua_S, "GetPlugins", tolua_cWebAdmin_GetPlugins);

View File

@ -12,7 +12,9 @@ SET (SRCS
HTTPServer.cpp
MultipartParser.cpp
NameValueParser.cpp
SslHTTPConnection.cpp)
SslHTTPConnection.cpp
UrlParser.cpp
)
SET (HDRS
EnvelopeParser.h
@ -22,7 +24,9 @@ SET (HDRS
HTTPServer.h
MultipartParser.h
NameValueParser.h
SslHTTPConnection.h)
SslHTTPConnection.h
UrlParser.h
)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set_source_files_properties(HTTPServer.cpp PROPERTIES COMPILE_FLAGS "-Wno-error=global-constructors ")

View File

@ -0,0 +1,200 @@
// UrlParser.cpp
// Implements the cUrlParser class that parses string URL into individual parts
#include "Globals.h"
#include "UrlParser.h"
UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme)
{
if (a_Scheme == "http")
{
return 80;
}
else if (a_Scheme == "https")
{
return 443;
}
else if (a_Scheme == "ftp")
{
return 21;
}
else if (a_Scheme == "mailto")
{
return 25;
}
return 0;
}
std::pair<bool, AString> cUrlParser::ParseAuthorityPart(
const AString & a_AuthorityPart,
AString & a_Username,
AString & a_Password,
AString & a_Host,
UInt16 & a_Port
)
{
/*
a_AuthorityPart format:
[user:password@]host[:port]
host can be an IPv4, hostname, or an IPv6 enclosed in brackets
Assume only the password can contain an additional at-sign
*/
// Split the authority on the last at-sign, if present:
auto idxLastAtSign = a_AuthorityPart.find_last_of('@');
auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign);
auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1);
// User credentials are completely optional:
auto idxCredColon = credPart.find(':');
a_Username = credPart.substr(0, idxCredColon);
a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1);
// Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first
if (srvrPart.empty())
{
// No host information at all. Bail out with success
a_Host.clear();
return std::make_pair(true, AString());
}
if (srvrPart[0] == '[')
{
// [IPv6] host, search for the closing bracket
auto idxClosingBracket = srvrPart.find(']');
if (idxClosingBracket == AString::npos)
{
return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket");
}
a_Host = srvrPart.substr(0, idxClosingBracket);
auto portPart = srvrPart.substr(idxClosingBracket + 1);
if (portPart.empty())
{
// No port was specified, return success
return std::make_pair(true, AString());
}
if (portPart[0] != ':')
{
return std::make_pair(false, "Invalid port format after IPv6 address, mising colon");
}
if (!StringToInteger(portPart.substr(2), a_Port))
{
return std::make_pair(false, "Failed to parse port number after IPv6 address");
}
return std::make_pair(true, AString());
}
// Not an [IPv6] address, split on the last colon:
auto idxLastColon = srvrPart.find_last_of(':');
a_Host = srvrPart.substr(0, idxLastColon);
if (idxLastColon == AString::npos)
{
// No port was specified, return success
return std::make_pair(true, AString());
}
auto portPart = srvrPart.substr(idxLastColon + 1);
if (!StringToInteger(portPart, a_Port))
{
return std::make_pair(false, "Failed to parse port number after hostname");
}
return std::make_pair(true, AString());
}
std::pair<bool, AString> cUrlParser::Parse(
const AString & a_Url,
AString & a_Scheme,
AString & a_Username,
AString & a_Password,
AString & a_Host,
UInt16 & a_Port,
AString & a_Path,
AString & a_Query,
AString & a_Fragment
)
{
// Find the scheme - the text before the first colon:
auto idxColon = a_Url.find(':');
if (idxColon == AString::npos)
{
return std::make_pair(false, "Cannot parse the Scheme part of the URL");
}
a_Scheme = StrToLower(a_Url.substr(0, idxColon));
a_Port = GetDefaultPort(a_Scheme);
if (a_Port == 0)
{
return std::make_pair(false, Printf("Unknown URL scheme: \"%s\"", a_Scheme.c_str()));
}
// If the next two chars are a double-slash, skip them:
auto authStart = idxColon + 1;
if (a_Url.substr(authStart, 2) == "//")
{
authStart += 2;
}
// The Authority part follows the Scheme, until the first slash:
auto idxFirstSlash = a_Url.find('/', authStart + 1);
if (idxFirstSlash == AString::npos)
{
// No slash, the whole end of the Url is the authority part
idxFirstSlash = a_Url.size();
}
// Parse the Authority part into individual components:
auto res = ParseAuthorityPart(
a_Url.substr(authStart, idxFirstSlash - authStart),
a_Username, a_Password,
a_Host, a_Port
);
if (!res.first)
{
return res;
}
// Parse the rest into a path, query and fragment:
a_Path.clear();
a_Query.clear();
a_Fragment.clear();
if (idxFirstSlash == a_Url.size())
{
// No additional data, bail out with success
return std::make_pair(true, AString());
}
auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1);
if (idxPathEnd == AString::npos)
{
a_Path = a_Url.substr(idxFirstSlash);
return std::make_pair(true, AString());
}
a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash);
auto idxHash = a_Url.find('#', idxPathEnd);
if (idxHash == AString::npos)
{
a_Query = a_Url.substr(idxPathEnd + 1);
return std::make_pair(true, AString());
}
if (idxHash > idxPathEnd)
{
a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1);
}
a_Fragment = a_Url.substr(idxHash + 1);
return std::make_pair(true, AString());
}

View File

@ -0,0 +1,58 @@
// UrlParser.h
// Declares the cUrlParser class that parses string URL into individual parts
#pragma once
class cUrlParser
{
public:
/** Returns true if the specified scheme (http, ftp, mailto, ...) is recognized by the URL parser.
Is case sensitive, known schemes are always lowercase. */
static bool IsKnownScheme(const AString & a_Scheme) { return (GetDefaultPort(a_Scheme) > 0); }
/** Returns the default port used by the specified scheme / protocol.
If the scheme is not known, 0 is returned. */
static UInt16 GetDefaultPort(const AString & a_Scheme);
/** Parses the given Authority part of an URL into individual components.
Returns true on success,
returns false and error message on failure. */
static std::pair<bool, AString> ParseAuthorityPart(
const AString & a_AuthorityPart,
AString & a_Username,
AString & a_Password,
AString & a_Host,
UInt16 & a_Port
);
/** Parses the given URL into individual components.
Returns true on success,
returns false and error message on failure.
Fails if the scheme (protocol) is not known.
If port is missing, the default port for the specific scheme is applied. */
static std::pair<bool, AString> Parse(
const AString & a_Url,
AString & a_Scheme,
AString & a_Username,
AString & a_Password,
AString & a_Host,
UInt16 & a_Port,
AString & a_Path,
AString & a_Query,
AString & a_Fragment
);
};