/* * url.c * * Copyright (C) 1993-1997, John Kilburg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "port_before.h" #include #include #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include "port_after.h" #include "url.h" #define URLDELIMS ":/?#" #ifndef NullString #define NullString(s) (s == NULL || *s == '\0') #endif static URLParts *URLCreate _ArgProto((MemPool)); static char *resolve_filename _ArgProto((MemPool, char *, char *)); /* * URLcmp * * Return 0 if equal. */ int URLcmp(u1, u2) URLParts *u1, *u2; { if (strcasecmp(u1->scheme, u2->scheme) == 0 && strcasecmp(u1->hostname, u2->hostname) == 0 && u1->port == u2->port && strcasecmp(u1->filename, u2->filename) == 0) { return(0); } return(-1); } /* * URLEscape * * Puts escape codes in URLs. NOT complete. */ char * URLEscape(mp, url, s2p) MemPool mp; char *url; bool s2p; { char *cp; char *n, *s; static char *hex = "0123456789ABCDEF"; /* * use a bit of memory so i don't have to mess around here */ s = n = (char *)MPGet(mp, strlen(url) * 3 + 2); for (cp = url; *cp; cp++, n++) { if (*cp == ' ' && s2p) { *n = '+'; } else if (*cp == '+' && s2p) { *n = '%'; n++; *n = hex[*cp / 16]; n++; *n = hex[*cp % 16]; } #ifdef ORIGINAL_CODE /* else if (isalnum(*cp) || strchr("$-_.'(),+!*", *cp)) */ else if (strchr("<>\"#{}|\\^~[]`",*cp)) #else /* My CGI scripts and the Apache daemon say it's closer the first way - djhjr */ else if (isalnum(*cp) || strchr("$-_.'(),+!*", *cp)) #endif { *n = *cp; } else { *n = '%'; n++; *n = hex[*cp / 16]; n++; *n = hex[*cp % 16]; } } *n = '\0'; return(s); } /* * UnescapeURL * * Converts the escape codes (%xx) into actual characters. NOT complete. * Could do everthing in place I guess. */ char * URLUnescape(mp, url) MemPool mp; char *url; { char *cp, *n, *s; char hex[3]; s = n = (char *)MPGet(mp, strlen(url) + 2); for (cp = url; *cp; cp++, n++) { if (*cp == '%') { cp++; if (*cp == '%') { *n = *cp; } else { hex[0] = *cp; cp++; hex[1] = *cp; hex[2] = '\0'; *n = (char)strtol(hex, NULL, 16); } } else if (*cp == '+') *n = ' '; else { *n = *cp; } } *n = '\0'; return(s); } /* * URLMakeString */ char * URLMakeString(mp, up, addfrag) MemPool mp; URLParts *up; bool addfrag; { size_t len; char *u; char *delim; char *delim2; char *filename; char *hostname; char *scheme; char *delim3; char *fragment; if (NullString(up->scheme)) scheme = "file"; else scheme = up->scheme; if (NullString(up->hostname)) { delim = ""; hostname = ""; } else { delim = "//"; hostname = up->hostname; } if (NullString(up->filename)) filename = "/"; else filename = up->filename; delim2 = ""; if (up->fragment != NULL && addfrag) { fragment = up->fragment; delim3 = "#"; } else { fragment = ""; delim3 = ""; } len = strlen(scheme) + strlen(hostname) + strlen(filename) + strlen(delim) + strlen(fragment) + 11; u = (char *)MPGet(mp, len + 1); if (up->port == 0) { snprintf (u, len, "%s:%s%s%s%s%s%s", scheme, delim, hostname, delim2, filename, delim3, fragment); } else { snprintf (u, len, "%s:%s%s:%d%s%s%s%s", scheme, delim, hostname, up->port, delim2, filename, delim3, fragment); } return(u); } /* * URLCreate * * Allocate URLParts and initialize to NULLs */ static URLParts * URLCreate(mp) MemPool mp; { URLParts *up; up = (URLParts *)MPCGet(mp, sizeof(URLParts)); return(up); } /* * resolve_filename * * I'm not sure this is much better than the original. */ static char * resolve_filename(mp, c, p) MemPool mp; char *c, *p; { char *r; char *t; MemPool tmp; /* * If current is an absolute path then use it otherwise * build an absolute path using the parent as a reference. */ if (c == NULL || c[0] == '/') r = MPStrDup(mp, c); else if (c[0] == '~') { r = MPGet(mp, strlen(c) + 2); r[0] = '/'; strcpy(r + 1, c); } else { tmp = MPCreate(); if (p == NULL || p[0] != '/') p = "/"; else p = whack_filename(tmp, p); t = compress_path(tmp, c, p); if (t == NULL) r = MPStrDup(mp, "/"); else r = MPStrDup(mp, t); MPDestroy(tmp); } return(r); } /* * URLResolve * * c - current * p - parent * r - result */ URLParts * URLResolve(mp, c, p) MemPool mp; URLParts *c, *p; { URLParts *r; /* * If the protocols are different then just return the original with * some empty fields filled in. */ if (c->scheme != NULL && p->scheme != NULL && strcasecmp(c->scheme, p->scheme) != 0) { r = URLDup(mp, c); if (r->hostname == NULL) r->hostname = MPStrDup(mp, "localhost"); r->filename = resolve_filename(mp, c->filename, p->filename); return(r); } r = URLCreate(mp); /* * If current has a protocol then use it, otherwise * use the parent's protocol. If the parent doesn't have a protocol for * some reason then use "file". */ if (c->scheme == NULL) { if (p->scheme != NULL) r->scheme = MPStrDup(mp, p->scheme); else r->scheme = MPStrDup(mp, "file"); } else r->scheme = MPStrDup(mp, c->scheme); /* * If current has a hostname then use it, otherwise * use the parent's hostname. If neither has a hostname then * fallback to "localhost". */ if (c->hostname == NULL) { if (p->hostname != NULL) { r->hostname = MPStrDup(mp, p->hostname); r->port = p->port; } else { r->hostname = MPStrDup(mp, "localhost"); /* fallback */ r->port = 0; } } else { r->hostname = MPStrDup(mp, c->hostname); r->port = c->port; } r->filename = resolve_filename(mp, c->filename, p->filename); /* * Copy misc. fields. */ r->username = MPStrDup(mp, c->username); r->password = MPStrDup(mp, c->password); r->fragment = MPStrDup(mp, c->fragment); return(r); } URLParts * URLDup(mp, up) MemPool mp; URLParts *up; { URLParts *dp; dp = URLCreate(mp); dp->scheme = MPStrDup(mp, up->scheme); dp->hostname = MPStrDup(mp, up->hostname); dp->port = up->port; dp->filename = up->filename != NULL ? MPStrDup(mp, up->filename):MPStrDup(mp, "/"); dp->fragment = MPStrDup(mp, up->fragment); dp->username = MPStrDup(mp, up->username); dp->password = MPStrDup(mp, up->password); return(dp); } /* * URLParse * * Turns a URL into a URLParts structure * * The good stuff was written by Rob May * and heavily mangled/modified by john to suite his own weird style. */ URLParts * URLParse(mp, url) MemPool mp; char *url; { URLParts *up; char *cursor; char *pound; /* link pound (#) sign */ char *at; /* username/password @ */ char *ucolon; /* username colon */ char *pcolon; /* port number colon */ up = URLCreate(mp); /* skip leading white-space (if any)*/ for (cursor = url; isspace8(*cursor); cursor++) ; // Extract the scheme, if any up->scheme = URLGetScheme(mp, cursor); if (up->scheme) { // Skip the scheme and the : that follows // up->scheme contains the part before the : // Therefore, its cursor + length == the position of the : // We know that we have the : there, so skip that position cursor += strlen(up->scheme) + 1; } // If we have scheme://, we have a hostname and filename // Otherwise, only filename if (up->scheme && cursor[0] == '/' && cursor[1] == '/') { // Move the cursor after the // cursor += 2; // We know we have at least the hostname // Do we also have a slash, marking the existence of filename? const char *slash = strchr(cursor, '/'); if (slash != NULL) { // Yes, until the slash is hostname, after it the filename up->hostname = MPGet(mp, slash - cursor + 1); strncpy(up->hostname, cursor, slash - cursor); up->hostname[slash - cursor] = '\0'; up->filename = MPStrDup(mp, slash); } else { // No, the whole thing is a hostname up->hostname = MPStrDup(mp, cursor); } } else { // No //, so this is all filename up->filename = MPStrDup(mp, cursor); } /* * If there is a host string then divide it into * username:password@hostname:port as needed. */ if (up->hostname != NULL) { /* * Look for username:password. */ if ((at = strchr(up->hostname, '@')) != NULL) { char *mumble; up->username = MPGet(mp, at - up->hostname + 1); strncpy(up->username, up->hostname, at - up->hostname); up->username[at - up->hostname] = '\0'; mumble = MPStrDup(mp, at + 1); up->hostname = mumble; if ((ucolon = strchr(up->username, ':')) != NULL) { up->password = MPStrDup(mp, ucolon + 1); *ucolon = '\0'; } } /* * Grab the port. */ if ((pcolon = strchr(up->hostname, ':')) != NULL) { up->port = atoi(pcolon + 1); *pcolon = '\0'; } } /* * Check the filename for a '#foo' string. */ if (up->filename != NULL) { if ((pound = strchr(up->filename, '#')) != NULL) { *pound = '\0'; up->fragment = MPStrDup(mp, pound + 1); if (strlen(up->filename) == 0) up->filename = NULL; } } return(up); } /* * URLIsAbsolute */ bool URLIsAbsolute(up) URLParts *up; { if (up->scheme == NULL) return(false); return(true); } /* * URLBaseFilename */ char * URLBaseFilename(mp, up) MemPool mp; URLParts *up; { char *cp; if (up->filename == NULL) return(NULL); for (cp = up->filename + strlen(up->filename) - 1; cp >= up->filename; cp--) { if (*cp == '/') break; } cp++; if (*cp == '\0') return(NULL); return(MPStrDup(mp, cp)); } /* * URLGetScheme */ char * URLGetScheme(mp, url) MemPool mp; char *url; { char *cp, *dp; char *r; for (cp = url; *cp != '\0'; cp++) { for (dp = URLDELIMS; *dp != '\0'; dp++) { // Did we come across a URL delimiter? if (*cp == *dp) { // Yes, was it ':'? if (*cp == ':') { // Yes it was, scheme is what was before it r = (char *)MPCGet(mp, cp - url + 1); strncpy(r, url, cp - url); r[cp - url] = '\0'; return(r); } // No, it was something else. No scheme found. return(NULL); } } } return(NULL); }