From 958df6ad9192285e0a19caa362f9f0c9e63422d4 Mon Sep 17 00:00:00 2001 From: Mattes D Date: Fri, 25 Dec 2015 18:50:25 +0100 Subject: Added the cUrlParser class, exported to Lua API. --- Server/Plugins/APIDump/APIDesc.lua | 45 +++++++- Server/Plugins/Debuggers/Debuggers.lua | 41 +++++++ Server/Plugins/Debuggers/Info.lua | 6 + src/Bindings/CMakeLists.txt | 2 +- src/Bindings/ManualBindings.cpp | 159 ++++++++++++++++++++++++++ src/HTTPServer/CMakeLists.txt | 8 +- src/HTTPServer/UrlParser.cpp | 200 +++++++++++++++++++++++++++++++++ src/HTTPServer/UrlParser.h | 58 ++++++++++ 8 files changed, 515 insertions(+), 4 deletions(-) create mode 100644 src/HTTPServer/UrlParser.cpp create mode 100644 src/HTTPServer/UrlParser.h diff --git a/Server/Plugins/APIDump/APIDesc.lua b/Server/Plugins/APIDump/APIDesc.lua index 6be3795a8..ad7cf6403 100644 --- a/Server/Plugins/APIDump/APIDesc.lua +++ b/Server/Plugins/APIDump/APIDesc.lua @@ -2286,7 +2286,50 @@ local CompressedString = cStringCompression.CompressStringGZIP("DataToCompress") SetFuseTicks = { Return = "number", Notes = "Set the fuse ticks until the tnt will explode." }, }, Inherits = "cEntity", - }, + }, -- cTNTEntity + + cUrlParser = + { + Desc = [[ + Provides a parser for generic URLs that returns the individual components of the URL.

+

+ Note that all functions are static. Call them by using "cUrlParser:Parse(...)" etc. + ]], + Functions = + { + GetDefaultPort = { Params = "Scheme", Return = "number", Notes = "(STATIC) Returns the default port that should be used for the given scheme (protocol). Returns zero if the scheme is not known." }, + IsKnownScheme = { Params = "Scheme", Return = "bool", Notes = "(STATIC) Returns true if the scheme (protocol) is recognized by the parser." }, + Parse = { Params = "URL", Return = "Scheme, Username, Password, Host, Port, Path, Query, Fragment", Notes = "(STATIC) Returns the individual parts of the URL. Parts that are not explicitly specified in the URL are empty, the default port for the scheme is used. If parsing fails, the function returns nil and an error message." }, + ParseAuthorityPart = { Params = "AuthPart", Return = "Username, Password, Host, Port", Notes = "(STATIC) Parses the Authority part of the URL. Parts that are not explicitly specified in the AuthPart are returned empty, the port is returned zero. If parsing fails, the function returns nil and an error message." }, + }, + AdditionalInfo = + { + { + Header = "Code example", + Contents = [==[ + The following code fragment uses the cUrlParser to parse an URL string into its components, and + prints those components out: +

+local Scheme, Username, Password, Host, Port, Path, Query, Fragment = cUrlParser:Parse(
+	"http://anonymous:user@example.com@ftp.cuberite.org:9921/releases/2015/?sort=date#files"
+)
+if not(Scheme) then
+	LOG("  Error: " .. (username or ""))
+else
+	LOG("  Scheme   = " .. Scheme)    -- "http"
+	LOG("  Username = " .. Username)  -- "anonymous"
+	LOG("  Password = " .. Password)  -- "user@example.com"
+	LOG("  Host     = " .. Host)      -- "ftp.cuberite.org"
+	LOG("  Port     = " .. Port)      -- 9921
+	LOG("  Path     = " .. Path)      -- "releases/2015/"
+	LOG("  Query    = " .. Query)     -- "sort=date"
+	LOG("  Fragment = " .. Fragment)  -- "files"
+end
+
+ ]==], + }, + }, + }, -- cUrlParser cWebPlugin = { diff --git a/Server/Plugins/Debuggers/Debuggers.lua b/Server/Plugins/Debuggers/Debuggers.lua index 0559a4ef8..7058a5025 100644 --- a/Server/Plugins/Debuggers/Debuggers.lua +++ b/Server/Plugins/Debuggers/Debuggers.lua @@ -2030,6 +2030,47 @@ end +function HandleConsoleTestUrlParser(a_Split, a_EntireCmd) + LOG("Testing cUrlParser...") + local UrlsToTest = + { + "invalid URL", + "https://github.com", + "ftp://anonymous:user@example.com@ftp.cuberite.org:9921/releases/2015/2015-12-25.zip", + "ftp://anonymous:user:name:with:colons@example.com@ftp.cuberite.org:9921", + "http://google.com/", + "http://google.com/?q=cuberite", + "http://google.com/search?q=cuberite", + "http://google.com/some/search?q=cuberite#results", + "http://google.com/?q=cuberite#results", + "http://google.com/#results", + "ftp://cuberite.org:9921/releases/2015/2015-12-25.zip", + "mailto:support@cuberite.org", + } + for _, u in ipairs(UrlsToTest) do + LOG("URL: " .. u) + local scheme, username, password, host, port, path, query, fragment = cUrlParser:Parse(u) + if not(scheme) then + LOG(" Error: " .. (username or "")) + else + LOG(" Scheme = " .. scheme) + LOG(" Username = " .. username) + LOG(" Password = " .. password) + LOG(" Host = " .. host) + LOG(" Port = " .. port) + LOG(" Path = " .. path) + LOG(" Query = " .. query) + LOG(" Fragment = " .. fragment) + end + end + LOG("cUrlParser test complete") + return true +end + + + + + function HandleConsoleBBox(a_Split) local bbox = cBoundingBox(0, 10, 0, 10, 0, 10) local v1 = Vector3d(1, 1, 1) diff --git a/Server/Plugins/Debuggers/Info.lua b/Server/Plugins/Debuggers/Info.lua index f71ee5509..486cfd0d9 100644 --- a/Server/Plugins/Debuggers/Info.lua +++ b/Server/Plugins/Debuggers/Info.lua @@ -253,6 +253,12 @@ g_PluginInfo = Handler = HandleConsoleTestTracer, HelpString = "Tests the cLineBlockTracer", }, + + ["testurlparser"] = + { + Handler = HandleConsoleTestUrlParser, + HelpString = "Tests the cUrlParser", + }, }, -- ConsoleCommands } -- g_PluginInfo diff --git a/src/Bindings/CMakeLists.txt b/src/Bindings/CMakeLists.txt index a53e82581..702d38365 100644 --- a/src/Bindings/CMakeLists.txt +++ b/src/Bindings/CMakeLists.txt @@ -154,5 +154,5 @@ endif() if(NOT MSVC) add_library(Bindings ${SRCS} ${HDRS}) - target_link_libraries(Bindings lua sqlite tolualib mbedtls) + target_link_libraries(Bindings lua sqlite tolualib mbedtls HTTPServer) endif() diff --git a/src/Bindings/ManualBindings.cpp b/src/Bindings/ManualBindings.cpp index 3a595c1d2..42e7e9bd2 100644 --- a/src/Bindings/ManualBindings.cpp +++ b/src/Bindings/ManualBindings.cpp @@ -36,6 +36,7 @@ #include "../StringCompression.h" #include "../CommandOutput.h" #include "../BuildInfo.h" +#include "../HTTPServer/UrlParser.h" @@ -1956,6 +1957,155 @@ static int tolua_get_HTTPRequest_FormData(lua_State* tolua_S) +static int tolua_cUrlParser_GetDefaultPort(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:GetDefaultPort("scheme") -> number + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString scheme; + L.GetStackValue(2, scheme); + + // Execute and push result: + L.Push(cUrlParser::GetDefaultPort(scheme)); + return 1; +} + + + + + +static int tolua_cUrlParser_IsKnownScheme(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:IsKnownScheme("scheme") -> bool + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString scheme; + L.GetStackValue(2, scheme); + + // Execute and push result: + L.Push(cUrlParser::IsKnownScheme(scheme)); + return 1; +} + + + + + +static int tolua_cUrlParser_Parse(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:Parse("url") -> "scheme", "user", "password", "host", portnum, "path", "query", "fragment" + // On error, returns nil and error message + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString url; + L.GetStackValue(2, url); + + // Execute and push result: + AString scheme, username, password, host, path, query, fragment; + UInt16 port; + auto res = cUrlParser::Parse(url, scheme, username, password, host, port, path, query, fragment); + if (!res.first) + { + // Error, return nil and error msg: + L.PushNil(); + L.Push(res.second); + return 2; + } + L.Push(scheme); + L.Push(username); + L.Push(password); + L.Push(host); + L.Push(port); + L.Push(path); + L.Push(query); + L.Push(fragment); + return 8; +} + + + + + +static int tolua_cUrlParser_ParseAuthorityPart(lua_State * a_LuaState) +{ + // API function signature: + // cUrlParser:ParseAuthorityPart("authority") -> "user", "password", "host", portnum + // On error, returns nil and error message + // Parts not specified in the "authority" are left empty / zero + + // Check params: + cLuaState L(a_LuaState); + if ( + !L.CheckParamUserTable(1, "cUrlParser") || + !L.CheckParamString(2) || + !L.CheckParamEnd(3) + ) + { + return 0; + } + + // Read params from Lua: + AString authPart; + L.GetStackValue(2, authPart); + + // Execute and push result: + AString username, password, host; + UInt16 port; + auto res = cUrlParser::ParseAuthorityPart(authPart, username, password, host, port); + if (!res.first) + { + // Error, return nil and error msg: + L.PushNil(); + L.Push(res.second); + return 2; + } + L.Push(username); + L.Push(password); + L.Push(host); + L.Push(port); + return 4; +} + + + + + static int tolua_cWebAdmin_GetPlugins(lua_State * tolua_S) { cWebAdmin * self = reinterpret_cast(tolua_tousertype(tolua_S, 1, nullptr)); @@ -3224,9 +3374,11 @@ void cManualBindings::Bind(lua_State * tolua_S) tolua_usertype(tolua_S, "cCryptoHash"); tolua_usertype(tolua_S, "cLineBlockTracer"); tolua_usertype(tolua_S, "cStringCompression"); + tolua_usertype(tolua_S, "cUrlParser"); tolua_cclass(tolua_S, "cCryptoHash", "cCryptoHash", "", nullptr); tolua_cclass(tolua_S, "cLineBlockTracer", "cLineBlockTracer", "", nullptr); tolua_cclass(tolua_S, "cStringCompression", "cStringCompression", "", nullptr); + tolua_cclass(tolua_S, "cUrlParser", "cUrlParser", "", nullptr); // Globals: tolua_function(tolua_S, "Clamp", tolua_Clamp); @@ -3390,6 +3542,13 @@ void cManualBindings::Bind(lua_State * tolua_S) tolua_function(tolua_S, "InflateString", tolua_InflateString); tolua_endmodule(tolua_S); + tolua_beginmodule(tolua_S, "cUrlParser"); + tolua_function(tolua_S, "GetDefaultPort", tolua_cUrlParser_GetDefaultPort); + tolua_function(tolua_S, "IsKnownScheme", tolua_cUrlParser_IsKnownScheme); + tolua_function(tolua_S, "Parse", tolua_cUrlParser_Parse); + tolua_function(tolua_S, "ParseAuthorityPart", tolua_cUrlParser_ParseAuthorityPart); + tolua_endmodule(tolua_S); + tolua_beginmodule(tolua_S, "cWebAdmin"); tolua_function(tolua_S, "GetHTMLEscapedString", tolua_AllToLua_cWebAdmin_GetHTMLEscapedString); tolua_function(tolua_S, "GetPlugins", tolua_cWebAdmin_GetPlugins); diff --git a/src/HTTPServer/CMakeLists.txt b/src/HTTPServer/CMakeLists.txt index 6788d50bf..b875b9145 100644 --- a/src/HTTPServer/CMakeLists.txt +++ b/src/HTTPServer/CMakeLists.txt @@ -12,7 +12,9 @@ SET (SRCS HTTPServer.cpp MultipartParser.cpp NameValueParser.cpp - SslHTTPConnection.cpp) + SslHTTPConnection.cpp + UrlParser.cpp +) SET (HDRS EnvelopeParser.h @@ -22,7 +24,9 @@ SET (HDRS HTTPServer.h MultipartParser.h NameValueParser.h - SslHTTPConnection.h) + SslHTTPConnection.h + UrlParser.h +) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set_source_files_properties(HTTPServer.cpp PROPERTIES COMPILE_FLAGS "-Wno-error=global-constructors ") diff --git a/src/HTTPServer/UrlParser.cpp b/src/HTTPServer/UrlParser.cpp new file mode 100644 index 000000000..05db3e413 --- /dev/null +++ b/src/HTTPServer/UrlParser.cpp @@ -0,0 +1,200 @@ + +// UrlParser.cpp + +// Implements the cUrlParser class that parses string URL into individual parts + +#include "Globals.h" +#include "UrlParser.h" + + + + + +UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme) +{ + if (a_Scheme == "http") + { + return 80; + } + else if (a_Scheme == "https") + { + return 443; + } + else if (a_Scheme == "ftp") + { + return 21; + } + else if (a_Scheme == "mailto") + { + return 25; + } + return 0; +} + + + + + +std::pair cUrlParser::ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port +) +{ + /* + a_AuthorityPart format: + [user:password@]host[:port] + host can be an IPv4, hostname, or an IPv6 enclosed in brackets + Assume only the password can contain an additional at-sign + */ + + // Split the authority on the last at-sign, if present: + auto idxLastAtSign = a_AuthorityPart.find_last_of('@'); + auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign); + auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1); + + // User credentials are completely optional: + auto idxCredColon = credPart.find(':'); + a_Username = credPart.substr(0, idxCredColon); + a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1); + + // Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first + if (srvrPart.empty()) + { + // No host information at all. Bail out with success + a_Host.clear(); + return std::make_pair(true, AString()); + } + if (srvrPart[0] == '[') + { + // [IPv6] host, search for the closing bracket + auto idxClosingBracket = srvrPart.find(']'); + if (idxClosingBracket == AString::npos) + { + return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket"); + } + a_Host = srvrPart.substr(0, idxClosingBracket); + auto portPart = srvrPart.substr(idxClosingBracket + 1); + if (portPart.empty()) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + if (portPart[0] != ':') + { + return std::make_pair(false, "Invalid port format after IPv6 address, mising colon"); + } + if (!StringToInteger(portPart.substr(2), a_Port)) + { + return std::make_pair(false, "Failed to parse port number after IPv6 address"); + } + return std::make_pair(true, AString()); + } + + // Not an [IPv6] address, split on the last colon: + auto idxLastColon = srvrPart.find_last_of(':'); + a_Host = srvrPart.substr(0, idxLastColon); + if (idxLastColon == AString::npos) + { + // No port was specified, return success + return std::make_pair(true, AString()); + } + auto portPart = srvrPart.substr(idxLastColon + 1); + if (!StringToInteger(portPart, a_Port)) + { + return std::make_pair(false, "Failed to parse port number after hostname"); + } + return std::make_pair(true, AString()); +} + + + + + +std::pair cUrlParser::Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment +) +{ + // Find the scheme - the text before the first colon: + auto idxColon = a_Url.find(':'); + if (idxColon == AString::npos) + { + return std::make_pair(false, "Cannot parse the Scheme part of the URL"); + } + a_Scheme = StrToLower(a_Url.substr(0, idxColon)); + a_Port = GetDefaultPort(a_Scheme); + if (a_Port == 0) + { + return std::make_pair(false, Printf("Unknown URL scheme: \"%s\"", a_Scheme.c_str())); + } + + // If the next two chars are a double-slash, skip them: + auto authStart = idxColon + 1; + if (a_Url.substr(authStart, 2) == "//") + { + authStart += 2; + } + + // The Authority part follows the Scheme, until the first slash: + auto idxFirstSlash = a_Url.find('/', authStart + 1); + if (idxFirstSlash == AString::npos) + { + // No slash, the whole end of the Url is the authority part + idxFirstSlash = a_Url.size(); + } + + // Parse the Authority part into individual components: + auto res = ParseAuthorityPart( + a_Url.substr(authStart, idxFirstSlash - authStart), + a_Username, a_Password, + a_Host, a_Port + ); + if (!res.first) + { + return res; + } + + // Parse the rest into a path, query and fragment: + a_Path.clear(); + a_Query.clear(); + a_Fragment.clear(); + if (idxFirstSlash == a_Url.size()) + { + // No additional data, bail out with success + return std::make_pair(true, AString()); + } + auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1); + if (idxPathEnd == AString::npos) + { + a_Path = a_Url.substr(idxFirstSlash); + return std::make_pair(true, AString()); + } + a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash); + auto idxHash = a_Url.find('#', idxPathEnd); + if (idxHash == AString::npos) + { + a_Query = a_Url.substr(idxPathEnd + 1); + return std::make_pair(true, AString()); + } + if (idxHash > idxPathEnd) + { + a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1); + } + a_Fragment = a_Url.substr(idxHash + 1); + return std::make_pair(true, AString()); +} + + + + + diff --git a/src/HTTPServer/UrlParser.h b/src/HTTPServer/UrlParser.h new file mode 100644 index 000000000..15a63e05d --- /dev/null +++ b/src/HTTPServer/UrlParser.h @@ -0,0 +1,58 @@ + +// UrlParser.h + +// Declares the cUrlParser class that parses string URL into individual parts + + + + + +#pragma once + + + + + +class cUrlParser +{ +public: + /** Returns true if the specified scheme (http, ftp, mailto, ...) is recognized by the URL parser. + Is case sensitive, known schemes are always lowercase. */ + static bool IsKnownScheme(const AString & a_Scheme) { return (GetDefaultPort(a_Scheme) > 0); } + + /** Returns the default port used by the specified scheme / protocol. + If the scheme is not known, 0 is returned. */ + static UInt16 GetDefaultPort(const AString & a_Scheme); + + /** Parses the given Authority part of an URL into individual components. + Returns true on success, + returns false and error message on failure. */ + static std::pair ParseAuthorityPart( + const AString & a_AuthorityPart, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port + ); + + /** Parses the given URL into individual components. + Returns true on success, + returns false and error message on failure. + Fails if the scheme (protocol) is not known. + If port is missing, the default port for the specific scheme is applied. */ + static std::pair Parse( + const AString & a_Url, + AString & a_Scheme, + AString & a_Username, + AString & a_Password, + AString & a_Host, + UInt16 & a_Port, + AString & a_Path, + AString & a_Query, + AString & a_Fragment + ); +}; + + + + -- cgit v1.2.3