From eb9111646e78cf5b7ce5f6d516eca78b49684186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juhani=20Krekel=C3=A4?= Date: Tue, 20 Apr 2021 02:39:41 +0300 Subject: [PATCH] Use a more robust scheme to separate the scheme, hostname, and filename in URLParse() --- common/url.c | 98 ++++++++++++++++++---------------------------------- 1 file changed, 34 insertions(+), 64 deletions(-) diff --git a/common/url.c b/common/url.c index 2cf458a..37412ef 100644 --- a/common/url.c +++ b/common/url.c @@ -404,8 +404,7 @@ MemPool mp; char *url; { URLParts *up; - char *start; - char *colon, *slash, *fslash; + char *cursor; char *pound; /* link pound (#) sign */ char *at; /* username/password @ */ char *ucolon; /* username colon */ @@ -414,83 +413,49 @@ char *url; up = URLCreate(mp); /* skip leading white-space (if any)*/ - for (start = url; isspace8(*start); start++) + for (cursor = url; isspace8(*cursor); cursor++) ; - /* - * Look for indication of a scheme. - */ - colon = strchr(start, ':'); + // Extract the scheme, if any + up->scheme = URLGetScheme(mp, cursor); - /* - * Search for characters that indicate the beginning of the - * path/params/query/fragment part. - */ - slash = strchr(start, '/'); - if (slash == NULL) slash = strchr(start, ';'); - if (slash == NULL) slash = strchr(start, '?'); - if (slash == NULL) slash = strchr(start, '#'); - - /* - * Check to see if there is a scheme. There is a scheme only if - * all other separators appear after the colon. - */ - if (colon != NULL && (slash == NULL || colon < slash)) + if (up->scheme) { - up->scheme = MPGet(mp, colon - start + 1); - strncpy(up->scheme, start, colon - start); - up->scheme[colon - start] = '\0'; + // Skip the scheme and the : that follows + // up->scheme contains the part before the : + // Therefore, its cursor + length == the position of the : + // We know that we have the : there, so skip that position + cursor += strlen(up->scheme) + 1; } - /* - * If there is a slash then sort out the hostname and filename. - * If there is no slash then there is no hostname but there is a - * filename. - */ - if (slash != NULL) + // If we have scheme://, we have a hostname and filename + // Otherwise, only filename + if (up->scheme && cursor[0] == '/' && cursor[1] == '/') { - /* - * Check for leading //. If its there then there is a host string. - */ - if ((*(slash + 1) == '/') && ((colon == NULL && slash == start) || - (colon != NULL && slash == colon + 1))) + // Move the cursor after the // + cursor += 2; + + // We know we have at least the hostname + // Do we also have a slash, marking the existence of filename? + const char *slash = strchr(cursor, '/'); + if (slash != NULL) { - /* - * Check for filename at end of host string. - */ - slash += 2; - if ((fslash = strchr(slash, '/')) != NULL) - { - up->hostname = MPGet(mp, fslash - slash + 1); - strncpy(up->hostname, slash, fslash - slash); - up->hostname[fslash - slash] = '\0'; - up->filename = MPStrDup(mp, fslash); - } - else - { /* there is no filename */ - up->hostname = MPStrDup(mp, slash); - } + // Yes, until the slash is hostname, after it the filename + up->hostname = MPGet(mp, slash - cursor + 1); + strncpy(up->hostname, cursor, slash - cursor); + up->hostname[slash - cursor] = '\0'; + up->filename = MPStrDup(mp, slash); } else { - /* - * the rest is a filename because there is no // or it appears - * after other characters - */ - if (colon != NULL && colon < slash) - { - up->filename = MPStrDup(mp, colon + 1); - } - else up->filename = MPStrDup(mp, start); + // No, the whole thing is a hostname + up->hostname = MPStrDup(mp, cursor); } } else { - /* - * No slashes at all so the rest must be a filename. - */ - if (colon == NULL) up->filename = MPStrDup(mp, start); - else up->filename = MPStrDup(mp, colon + 1); + // No //, so this is all filename + up->filename = MPStrDup(mp, cursor); } /* @@ -597,14 +562,19 @@ char *url; { for (dp = URLDELIMS; *dp != '\0'; dp++) { + // Did we come across a URL delimiter? if (*cp == *dp) { + // Yes, was it ':'? if (*cp == ':') { + // Yes it was, scheme is what was before it r = (char *)MPCGet(mp, cp - url + 1); strncpy(r, url, cp - url); + r[cp - url] = '\0'; return(r); } + // No, it was something else. No scheme found. return(NULL); } }