Use a more robust scheme to separate the scheme, hostname, and filename in URLParse()

This commit is contained in:
Juhani Krekelä 2021-04-20 02:39:41 +03:00
parent 3680d14485
commit eb9111646e
1 changed files with 34 additions and 64 deletions

View File

@ -404,8 +404,7 @@ MemPool mp;
char *url;
{
URLParts *up;
char *start;
char *colon, *slash, *fslash;
char *cursor;
char *pound; /* link pound (#) sign */
char *at; /* username/password @ */
char *ucolon; /* username colon */
@ -414,83 +413,49 @@ char *url;
up = URLCreate(mp);
/* skip leading white-space (if any)*/
for (start = url; isspace8(*start); start++)
for (cursor = url; isspace8(*cursor); cursor++)
;
/*
* Look for indication of a scheme.
*/
colon = strchr(start, ':');
// Extract the scheme, if any
up->scheme = URLGetScheme(mp, cursor);
/*
* Search for characters that indicate the beginning of the
* path/params/query/fragment part.
*/
slash = strchr(start, '/');
if (slash == NULL) slash = strchr(start, ';');
if (slash == NULL) slash = strchr(start, '?');
if (slash == NULL) slash = strchr(start, '#');
/*
* Check to see if there is a scheme. There is a scheme only if
* all other separators appear after the colon.
*/
if (colon != NULL && (slash == NULL || colon < slash))
if (up->scheme)
{
up->scheme = MPGet(mp, colon - start + 1);
strncpy(up->scheme, start, colon - start);
up->scheme[colon - start] = '\0';
// Skip the scheme and the : that follows
// up->scheme contains the part before the :
// Therefore, its cursor + length == the position of the :
// We know that we have the : there, so skip that position
cursor += strlen(up->scheme) + 1;
}
/*
* If there is a slash then sort out the hostname and filename.
* If there is no slash then there is no hostname but there is a
* filename.
*/
if (slash != NULL)
// If we have scheme://, we have a hostname and filename
// Otherwise, only filename
if (up->scheme && cursor[0] == '/' && cursor[1] == '/')
{
/*
* Check for leading //. If its there then there is a host string.
*/
if ((*(slash + 1) == '/') && ((colon == NULL && slash == start) ||
(colon != NULL && slash == colon + 1)))
// Move the cursor after the //
cursor += 2;
// We know we have at least the hostname
// Do we also have a slash, marking the existence of filename?
const char *slash = strchr(cursor, '/');
if (slash != NULL)
{
/*
* Check for filename at end of host string.
*/
slash += 2;
if ((fslash = strchr(slash, '/')) != NULL)
{
up->hostname = MPGet(mp, fslash - slash + 1);
strncpy(up->hostname, slash, fslash - slash);
up->hostname[fslash - slash] = '\0';
up->filename = MPStrDup(mp, fslash);
}
else
{ /* there is no filename */
up->hostname = MPStrDup(mp, slash);
}
// Yes, until the slash is hostname, after it the filename
up->hostname = MPGet(mp, slash - cursor + 1);
strncpy(up->hostname, cursor, slash - cursor);
up->hostname[slash - cursor] = '\0';
up->filename = MPStrDup(mp, slash);
}
else
{
/*
* the rest is a filename because there is no // or it appears
* after other characters
*/
if (colon != NULL && colon < slash)
{
up->filename = MPStrDup(mp, colon + 1);
}
else up->filename = MPStrDup(mp, start);
// No, the whole thing is a hostname
up->hostname = MPStrDup(mp, cursor);
}
}
else
{
/*
* No slashes at all so the rest must be a filename.
*/
if (colon == NULL) up->filename = MPStrDup(mp, start);
else up->filename = MPStrDup(mp, colon + 1);
// No //, so this is all filename
up->filename = MPStrDup(mp, cursor);
}
/*
@ -597,14 +562,19 @@ char *url;
{
for (dp = URLDELIMS; *dp != '\0'; dp++)
{
// Did we come across a URL delimiter?
if (*cp == *dp)
{
// Yes, was it ':'?
if (*cp == ':')
{
// Yes it was, scheme is what was before it
r = (char *)MPCGet(mp, cp - url + 1);
strncpy(r, url, cp - url);
r[cp - url] = '\0';
return(r);
}
// No, it was something else. No scheme found.
return(NULL);
}
}