Use a more robust scheme to separate the scheme, hostname, and filename in URLParse()

This commit is contained in:
Juhani Krekelä 2021-04-20 02:39:41 +03:00
parent 3680d14485
commit eb9111646e
1 changed files with 34 additions and 64 deletions

View File

@ -404,8 +404,7 @@ MemPool mp;
char *url; char *url;
{ {
URLParts *up; URLParts *up;
char *start; char *cursor;
char *colon, *slash, *fslash;
char *pound; /* link pound (#) sign */ char *pound; /* link pound (#) sign */
char *at; /* username/password @ */ char *at; /* username/password @ */
char *ucolon; /* username colon */ char *ucolon; /* username colon */
@ -414,83 +413,49 @@ char *url;
up = URLCreate(mp); up = URLCreate(mp);
/* skip leading white-space (if any)*/ /* skip leading white-space (if any)*/
for (start = url; isspace8(*start); start++) for (cursor = url; isspace8(*cursor); cursor++)
; ;
/* // Extract the scheme, if any
* Look for indication of a scheme. up->scheme = URLGetScheme(mp, cursor);
*/
colon = strchr(start, ':');
/* if (up->scheme)
* Search for characters that indicate the beginning of the
* path/params/query/fragment part.
*/
slash = strchr(start, '/');
if (slash == NULL) slash = strchr(start, ';');
if (slash == NULL) slash = strchr(start, '?');
if (slash == NULL) slash = strchr(start, '#');
/*
* Check to see if there is a scheme. There is a scheme only if
* all other separators appear after the colon.
*/
if (colon != NULL && (slash == NULL || colon < slash))
{ {
up->scheme = MPGet(mp, colon - start + 1); // Skip the scheme and the : that follows
strncpy(up->scheme, start, colon - start); // up->scheme contains the part before the :
up->scheme[colon - start] = '\0'; // Therefore, its cursor + length == the position of the :
// We know that we have the : there, so skip that position
cursor += strlen(up->scheme) + 1;
} }
/* // If we have scheme://, we have a hostname and filename
* If there is a slash then sort out the hostname and filename. // Otherwise, only filename
* If there is no slash then there is no hostname but there is a if (up->scheme && cursor[0] == '/' && cursor[1] == '/')
* filename.
*/
if (slash != NULL)
{ {
/* // Move the cursor after the //
* Check for leading //. If its there then there is a host string. cursor += 2;
*/
if ((*(slash + 1) == '/') && ((colon == NULL && slash == start) || // We know we have at least the hostname
(colon != NULL && slash == colon + 1))) // Do we also have a slash, marking the existence of filename?
const char *slash = strchr(cursor, '/');
if (slash != NULL)
{ {
/* // Yes, until the slash is hostname, after it the filename
* Check for filename at end of host string. up->hostname = MPGet(mp, slash - cursor + 1);
*/ strncpy(up->hostname, cursor, slash - cursor);
slash += 2; up->hostname[slash - cursor] = '\0';
if ((fslash = strchr(slash, '/')) != NULL) up->filename = MPStrDup(mp, slash);
{
up->hostname = MPGet(mp, fslash - slash + 1);
strncpy(up->hostname, slash, fslash - slash);
up->hostname[fslash - slash] = '\0';
up->filename = MPStrDup(mp, fslash);
}
else
{ /* there is no filename */
up->hostname = MPStrDup(mp, slash);
}
} }
else else
{ {
/* // No, the whole thing is a hostname
* the rest is a filename because there is no // or it appears up->hostname = MPStrDup(mp, cursor);
* after other characters
*/
if (colon != NULL && colon < slash)
{
up->filename = MPStrDup(mp, colon + 1);
}
else up->filename = MPStrDup(mp, start);
} }
} }
else else
{ {
/* // No //, so this is all filename
* No slashes at all so the rest must be a filename. up->filename = MPStrDup(mp, cursor);
*/
if (colon == NULL) up->filename = MPStrDup(mp, start);
else up->filename = MPStrDup(mp, colon + 1);
} }
/* /*
@ -597,14 +562,19 @@ char *url;
{ {
for (dp = URLDELIMS; *dp != '\0'; dp++) for (dp = URLDELIMS; *dp != '\0'; dp++)
{ {
// Did we come across a URL delimiter?
if (*cp == *dp) if (*cp == *dp)
{ {
// Yes, was it ':'?
if (*cp == ':') if (*cp == ':')
{ {
// Yes it was, scheme is what was before it
r = (char *)MPCGet(mp, cp - url + 1); r = (char *)MPCGet(mp, cp - url + 1);
strncpy(r, url, cp - url); strncpy(r, url, cp - url);
r[cp - url] = '\0';
return(r); return(r);
} }
// No, it was something else. No scheme found.
return(NULL); return(NULL);
} }
} }