summaryrefslogtreecommitdiffstats
path: root/src/HTTP/UrlParser.cpp
blob: 85b1cd216922a64f6e842689c3761494813469b2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

// UrlParser.cpp

// Implements the cUrlParser class that parses string URL into individual parts

#include "Globals.h"
#include "UrlParser.h"





UInt16 cUrlParser::GetDefaultPort(const AString & a_Scheme)
{
	if (a_Scheme == "http")
	{
		return 80;
	}
	else if (a_Scheme == "https")
	{
		return 443;
	}
	else if (a_Scheme == "ftp")
	{
		return 21;
	}
	else if (a_Scheme == "mailto")
	{
		return 25;
	}
	return 0;
}





std::pair<bool, AString> cUrlParser::ParseAuthorityPart(
	const AString & a_AuthorityPart,
	AString & a_Username,
	AString & a_Password,
	AString & a_Host,
	UInt16 & a_Port
)
{
	/*
	a_AuthorityPart format:
	[user:password@]host[:port]
	host can be an IPv4, hostname, or an IPv6 enclosed in brackets
	Assume only the password can contain an additional at-sign
	*/

	// Split the authority on the last at-sign, if present:
	auto idxLastAtSign = a_AuthorityPart.find_last_of('@');
	auto credPart = (idxLastAtSign == AString::npos) ? AString() : a_AuthorityPart.substr(0, idxLastAtSign);
	auto srvrPart = (idxLastAtSign == AString::npos) ? a_AuthorityPart : a_AuthorityPart.substr(idxLastAtSign + 1);

	// User credentials are completely optional:
	auto idxCredColon = credPart.find(':');
	a_Username = credPart.substr(0, idxCredColon);
	a_Password = (idxCredColon == AString::npos) ? AString() : credPart.substr(idxCredColon + 1);

	// Host can be a hostname, IPv4 or [IPv6]. If in brackets, search for the closing bracket first
	if (srvrPart.empty())
	{
		// No host information at all. Bail out with success
		a_Host.clear();
		return std::make_pair(true, AString());
	}
	if (srvrPart[0] == '[')
	{
		// [IPv6] host, search for the closing bracket
		auto idxClosingBracket = srvrPart.find(']');
		if (idxClosingBracket == AString::npos)
		{
			return std::make_pair(false, "Invalid IPv6-like address, missing closing bracket");
		}
		a_Host = srvrPart.substr(0, idxClosingBracket);
		auto portPart = srvrPart.substr(idxClosingBracket + 1);
		if (portPart.empty())
		{
			// No port was specified, return success
			return std::make_pair(true, AString());
		}
		if (portPart[0] != ':')
		{
			return std::make_pair(false, "Invalid port format after IPv6 address, mising colon");
		}
		if (!StringToInteger(portPart.substr(2), a_Port))
		{
			return std::make_pair(false, "Failed to parse port number after IPv6 address");
		}
		return std::make_pair(true, AString());
	}

	// Not an [IPv6] address, split on the last colon:
	auto idxLastColon = srvrPart.find_last_of(':');
	a_Host = srvrPart.substr(0, idxLastColon);
	if (idxLastColon == AString::npos)
	{
		// No port was specified, return success
		return std::make_pair(true, AString());
	}
	auto portPart = srvrPart.substr(idxLastColon + 1);
	if (!StringToInteger(portPart, a_Port))
	{
		return std::make_pair(false, "Failed to parse port number after hostname");
	}
	return std::make_pair(true, AString());
}





std::pair<bool, AString> cUrlParser::Parse(
	const AString & a_Url,
	AString & a_Scheme,
	AString & a_Username,
	AString & a_Password,
	AString & a_Host,
	UInt16 & a_Port,
	AString & a_Path,
	AString & a_Query,
	AString & a_Fragment
)
{
	// Find the scheme - the text before the first colon:
	auto idxColon = a_Url.find(':');
	if (idxColon == AString::npos)
	{
		return std::make_pair(false, "Cannot parse the Scheme part of the URL");
	}
	a_Scheme = StrToLower(a_Url.substr(0, idxColon));
	a_Port = GetDefaultPort(a_Scheme);
	if (a_Port == 0)
	{
		return std::make_pair(false, fmt::format(FMT_STRING("Unknown URL scheme: \"{}\""), a_Scheme));
	}

	// If the next two chars are a double-slash, skip them:
	auto authStart = idxColon + 1;
	if (a_Url.substr(authStart, 2) == "//")
	{
		authStart += 2;
	}

	// The Authority part follows the Scheme, until the first slash:
	auto idxFirstSlash = a_Url.find('/', authStart + 1);
	if (idxFirstSlash == AString::npos)
	{
		// No slash, the whole end of the Url is the authority part
		idxFirstSlash = a_Url.size();
	}

	// Parse the Authority part into individual components:
	auto res = ParseAuthorityPart(
		a_Url.substr(authStart, idxFirstSlash - authStart),
		a_Username, a_Password,
		a_Host, a_Port
	);
	if (!res.first)
	{
		return res;
	}

	// Parse the rest into a path, query and fragment:
	a_Path.clear();
	a_Query.clear();
	a_Fragment.clear();
	if (idxFirstSlash == a_Url.size())
	{
		// No additional data, bail out with success
		return std::make_pair(true, AString());
	}
	auto idxPathEnd = a_Url.find_first_of("?#", idxFirstSlash + 1);
	if (idxPathEnd == AString::npos)
	{
		a_Path = a_Url.substr(idxFirstSlash);
		return std::make_pair(true, AString());
	}
	a_Path = a_Url.substr(idxFirstSlash, idxPathEnd - idxFirstSlash);
	auto idxHash = a_Url.find('#', idxPathEnd);
	if (idxHash == AString::npos)
	{
		a_Query = a_Url.substr(idxPathEnd + 1);
		return std::make_pair(true, AString());
	}
	if (idxHash > idxPathEnd)
	{
		a_Query = a_Url.substr(idxPathEnd + 1, idxHash - idxPathEnd - 1);
	}
	a_Fragment = a_Url.substr(idxHash + 1);
	return std::make_pair(true, AString());
}