summaryrefslogblamecommitdiffstats
path: root/src/HTTP/UrlParser.cpp
blob: 85b1cd216922a64f6e842689c3761494813469b2 (plain) (tree)








































































































































                                                                                                                       
                                                                                                              





























































                                                                                 

// UrlParser.cpp

// Implements the cUrlParser class that parses string URL into individual parts

#include "Globals.h"
#include "UrlParser.h"





UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme)
{
	if (a_Scheme == "http")
	{
		return 80;
	}
	else if (a_Scheme == "https")
	{
		return 443;
	}
	else if (a_Scheme == "ftp")
	{
		return 21;
	}
	else if (a_Scheme == "mailto")
	{
		return 25;
	}
	return 0;
}





std::pair<bool, AString> cUrlParser::ParseAuthorityPart(
	const AString & a_AuthorityPart,
	AString & a_Username,
	AString & a_Password,
	AString & a_Host,
	UInt16 & a_Port
)
{
	/*
	a_AuthorityPart format:
	[user:password@]host[:port]
	host can be an IPv4, hostname, or an IPv6 enclosed in brackets
	Assume only the password can contain an additional at-sign
	*/

	// Split the authority on the last at-sign, if present:
	auto idxLastAtSign = a_AuthorityPart.find_last_of('@');
	auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign);
	auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1);

	// User credentials are completely optional:
	auto idxCredColon = credPart.find(':');
	a_Username = credPart.substr(0, idxCredColon);
	a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1);

	// Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first
	if (srvrPart.empty())
	{
		// No host information at all. Bail out with success
		a_Host.clear();
		return std::make_pair(true, AString());
	}
	if (srvrPart[0] == '[')
	{
		// [IPv6] host, search for the closing bracket
		auto idxClosingBracket = srvrPart.find(']');
		if (idxClosingBracket == AString::npos)
		{
			return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket");
		}
		a_Host = srvrPart.substr(0, idxClosingBracket);
		auto portPart = srvrPart.substr(idxClosingBracket + 1);
		if (portPart.empty())
		{
			// No port was specified, return success
			return std::make_pair(true, AString());
		}
		if (portPart[0] != ':')
		{
			return std::make_pair(false, "Invalid port format after IPv6 address, mising colon");
		}
		if (!StringToInteger(portPart.substr(2), a_Port))
		{
			return std::make_pair(false, "Failed to parse port number after IPv6 address");
		}
		return std::make_pair(true, AString());
	}

	// Not an [IPv6] address, split on the last colon:
	auto idxLastColon = srvrPart.find_last_of(':');
	a_Host = srvrPart.substr(0, idxLastColon);
	if (idxLastColon == AString::npos)
	{
		// No port was specified, return success
		return std::make_pair(true, AString());
	}
	auto portPart = srvrPart.substr(idxLastColon + 1);
	if (!StringToInteger(portPart, a_Port))
	{
		return std::make_pair(false, "Failed to parse port number after hostname");
	}
	return std::make_pair(true, AString());
}





std::pair<bool, AString> cUrlParser::Parse(
	const AString & a_Url,
	AString & a_Scheme,
	AString & a_Username,
	AString & a_Password,
	AString & a_Host,
	UInt16 & a_Port,
	AString & a_Path,
	AString & a_Query,
	AString & a_Fragment
)
{
	// Find the scheme - the text before the first colon:
	auto idxColon = a_Url.find(':');
	if (idxColon == AString::npos)
	{
		return std::make_pair(false, "Cannot parse the Scheme part of the URL");
	}
	a_Scheme = StrToLower(a_Url.substr(0, idxColon));
	a_Port = GetDefaultPort(a_Scheme);
	if (a_Port == 0)
	{
		return std::make_pair(false, fmt::format(FMT_STRING("Unknown URL scheme: \"{}\""), a_Scheme));
	}

	// If the next two chars are a double-slash, skip them:
	auto authStart = idxColon + 1;
	if (a_Url.substr(authStart, 2) == "//")
	{
		authStart += 2;
	}

	// The Authority part follows the Scheme, until the first slash:
	auto idxFirstSlash = a_Url.find('/', authStart + 1);
	if (idxFirstSlash == AString::npos)
	{
		// No slash, the whole end of the Url is the authority part
		idxFirstSlash = a_Url.size();
	}

	// Parse the Authority part into individual components:
	auto res = ParseAuthorityPart(
		a_Url.substr(authStart, idxFirstSlash - authStart),
		a_Username, a_Password,
		a_Host, a_Port
	);
	if (!res.first)
	{
		return res;
	}

	// Parse the rest into a path, query and fragment:
	a_Path.clear();
	a_Query.clear();
	a_Fragment.clear();
	if (idxFirstSlash == a_Url.size())
	{
		// No additional data, bail out with success
		return std::make_pair(true, AString());
	}
	auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1);
	if (idxPathEnd == AString::npos)
	{
		a_Path = a_Url.substr(idxFirstSlash);
		return std::make_pair(true, AString());
	}
	a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash);
	auto idxHash = a_Url.find('#', idxPathEnd);
	if (idxHash == AString::npos)
	{
		a_Query = a_Url.substr(idxPathEnd + 1);
		return std::make_pair(true, AString());
	}
	if (idxHash > idxPathEnd)
	{
		a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1);
	}
	a_Fragment = a_Url.substr(idxHash + 1);
	return std::make_pair(true, AString());
}