Use a more robust scheme to separate the scheme, hostname, and filename in URLParse()
This commit is contained in:
parent
3680d14485
commit
eb9111646e
98
common/url.c
98
common/url.c
|
@ -404,8 +404,7 @@ MemPool mp;
|
|||
char *url;
|
||||
{
|
||||
URLParts *up;
|
||||
char *start;
|
||||
char *colon, *slash, *fslash;
|
||||
char *cursor;
|
||||
char *pound; /* link pound (#) sign */
|
||||
char *at; /* username/password @ */
|
||||
char *ucolon; /* username colon */
|
||||
|
@ -414,83 +413,49 @@ char *url;
|
|||
up = URLCreate(mp);
|
||||
|
||||
/* skip leading white-space (if any)*/
|
||||
for (start = url; isspace8(*start); start++)
|
||||
for (cursor = url; isspace8(*cursor); cursor++)
|
||||
;
|
||||
|
||||
/*
|
||||
* Look for indication of a scheme.
|
||||
*/
|
||||
colon = strchr(start, ':');
|
||||
// Extract the scheme, if any
|
||||
up->scheme = URLGetScheme(mp, cursor);
|
||||
|
||||
/*
|
||||
* Search for characters that indicate the beginning of the
|
||||
* path/params/query/fragment part.
|
||||
*/
|
||||
slash = strchr(start, '/');
|
||||
if (slash == NULL) slash = strchr(start, ';');
|
||||
if (slash == NULL) slash = strchr(start, '?');
|
||||
if (slash == NULL) slash = strchr(start, '#');
|
||||
|
||||
/*
|
||||
* Check to see if there is a scheme. There is a scheme only if
|
||||
* all other separators appear after the colon.
|
||||
*/
|
||||
if (colon != NULL && (slash == NULL || colon < slash))
|
||||
if (up->scheme)
|
||||
{
|
||||
up->scheme = MPGet(mp, colon - start + 1);
|
||||
strncpy(up->scheme, start, colon - start);
|
||||
up->scheme[colon - start] = '\0';
|
||||
// Skip the scheme and the : that follows
|
||||
// up->scheme contains the part before the :
|
||||
// Therefore, its cursor + length == the position of the :
|
||||
// We know that we have the : there, so skip that position
|
||||
cursor += strlen(up->scheme) + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a slash then sort out the hostname and filename.
|
||||
* If there is no slash then there is no hostname but there is a
|
||||
* filename.
|
||||
*/
|
||||
if (slash != NULL)
|
||||
// If we have scheme://, we have a hostname and filename
|
||||
// Otherwise, only filename
|
||||
if (up->scheme && cursor[0] == '/' && cursor[1] == '/')
|
||||
{
|
||||
/*
|
||||
* Check for leading //. If its there then there is a host string.
|
||||
*/
|
||||
if ((*(slash + 1) == '/') && ((colon == NULL && slash == start) ||
|
||||
(colon != NULL && slash == colon + 1)))
|
||||
// Move the cursor after the //
|
||||
cursor += 2;
|
||||
|
||||
// We know we have at least the hostname
|
||||
// Do we also have a slash, marking the existence of filename?
|
||||
const char *slash = strchr(cursor, '/');
|
||||
if (slash != NULL)
|
||||
{
|
||||
/*
|
||||
* Check for filename at end of host string.
|
||||
*/
|
||||
slash += 2;
|
||||
if ((fslash = strchr(slash, '/')) != NULL)
|
||||
{
|
||||
up->hostname = MPGet(mp, fslash - slash + 1);
|
||||
strncpy(up->hostname, slash, fslash - slash);
|
||||
up->hostname[fslash - slash] = '\0';
|
||||
up->filename = MPStrDup(mp, fslash);
|
||||
}
|
||||
else
|
||||
{ /* there is no filename */
|
||||
up->hostname = MPStrDup(mp, slash);
|
||||
}
|
||||
// Yes, until the slash is hostname, after it the filename
|
||||
up->hostname = MPGet(mp, slash - cursor + 1);
|
||||
strncpy(up->hostname, cursor, slash - cursor);
|
||||
up->hostname[slash - cursor] = '\0';
|
||||
up->filename = MPStrDup(mp, slash);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* the rest is a filename because there is no // or it appears
|
||||
* after other characters
|
||||
*/
|
||||
if (colon != NULL && colon < slash)
|
||||
{
|
||||
up->filename = MPStrDup(mp, colon + 1);
|
||||
}
|
||||
else up->filename = MPStrDup(mp, start);
|
||||
// No, the whole thing is a hostname
|
||||
up->hostname = MPStrDup(mp, cursor);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* No slashes at all so the rest must be a filename.
|
||||
*/
|
||||
if (colon == NULL) up->filename = MPStrDup(mp, start);
|
||||
else up->filename = MPStrDup(mp, colon + 1);
|
||||
// No //, so this is all filename
|
||||
up->filename = MPStrDup(mp, cursor);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -597,14 +562,19 @@ char *url;
|
|||
{
|
||||
for (dp = URLDELIMS; *dp != '\0'; dp++)
|
||||
{
|
||||
// Did we come across a URL delimiter?
|
||||
if (*cp == *dp)
|
||||
{
|
||||
// Yes, was it ':'?
|
||||
if (*cp == ':')
|
||||
{
|
||||
// Yes it was, scheme is what was before it
|
||||
r = (char *)MPCGet(mp, cp - url + 1);
|
||||
strncpy(r, url, cp - url);
|
||||
r[cp - url] = '\0';
|
||||
return(r);
|
||||
}
|
||||
// No, it was something else. No scheme found.
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue