chimera2/common/url.c

585 lines
11 KiB
C

/*
* url.c
*
* Copyright (C) 1993-1997, John Kilburg <john@cs.unlv.edu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "port_before.h"
#include <stdio.h>
#include <ctype.h>
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include "port_after.h"
#include "url.h"
#define URLDELIMS ":/?#"
#ifndef NullString
#define NullString(s) (s == NULL || *s == '\0')
#endif
static URLParts *URLCreate _ArgProto((MemPool));
static char *resolve_filename _ArgProto((MemPool, char *, char *));
/*
* URLcmp
*
* Return 0 if equal.
*/
int
URLcmp(u1, u2)
URLParts *u1, *u2;
{
if (strcasecmp(u1->scheme, u2->scheme) == 0 &&
strcasecmp(u1->hostname, u2->hostname) == 0 &&
u1->port == u2->port &&
strcasecmp(u1->filename, u2->filename) == 0)
{
return(0);
}
return(-1);
}
/*
* URLEscape
*
* Puts escape codes in URLs. NOT complete.
*/
char *
URLEscape(mp, url, s2p)
MemPool mp;
char *url;
bool s2p;
{
char *cp;
char *n, *s;
static char *hex = "0123456789ABCDEF";
/*
* use a bit of memory so i don't have to mess around here
*/
s = n = (char *)MPGet(mp, strlen(url) * 3 + 2);
for (cp = url; *cp; cp++, n++)
{
if (*cp == ' ' && s2p)
{
*n = '+';
}
else if (*cp == '+' && s2p)
{
*n = '%';
n++;
*n = hex[*cp / 16];
n++;
*n = hex[*cp % 16];
}
#ifdef ORIGINAL_CODE
/*
else if (isalnum(*cp) || strchr("$-_.'(),+!*", *cp))
*/
else if (strchr("<>\"#{}|\\^~[]`",*cp))
#else
/* My CGI scripts and the Apache daemon say it's closer the first way - djhjr */
else if (isalnum(*cp) || strchr("$-_.'(),+!*", *cp))
#endif
{
*n = *cp;
}
else
{
*n = '%';
n++;
*n = hex[*cp / 16];
n++;
*n = hex[*cp % 16];
}
}
*n = '\0';
return(s);
}
/*
* UnescapeURL
*
* Converts the escape codes (%xx) into actual characters. NOT complete.
* Could do everthing in place I guess.
*/
char *
URLUnescape(mp, url)
MemPool mp;
char *url;
{
char *cp, *n, *s;
char hex[3];
s = n = (char *)MPGet(mp, strlen(url) + 2);
for (cp = url; *cp; cp++, n++)
{
if (*cp == '%')
{
cp++;
if (*cp == '%')
{
*n = *cp;
}
else
{
hex[0] = *cp;
cp++;
hex[1] = *cp;
hex[2] = '\0';
*n = (char)strtol(hex, NULL, 16);
}
}
else if (*cp == '+') *n = ' ';
else
{
*n = *cp;
}
}
*n = '\0';
return(s);
}
/*
* URLMakeString
*/
char *
URLMakeString(mp, up, addfrag)
MemPool mp;
URLParts *up;
bool addfrag;
{
size_t len;
char *u;
char *delim;
char *delim2;
char *filename;
char *hostname;
char *scheme;
char *delim3;
char *fragment;
if (NullString(up->scheme)) scheme = "file";
else scheme = up->scheme;
if (NullString(up->hostname))
{
delim = "";
hostname = "";
}
else
{
delim = "//";
hostname = up->hostname;
}
if (NullString(up->filename)) filename = "/";
else filename = up->filename;
delim2 = "";
if (up->fragment != NULL && addfrag)
{
fragment = up->fragment;
delim3 = "#";
}
else
{
fragment = "";
delim3 = "";
}
len = strlen(scheme) + strlen(hostname) + strlen(filename) +
strlen(delim) + strlen(fragment) + 11;
u = (char *)MPGet(mp, len + 1);
if (up->port == 0)
{
snprintf (u, len, "%s:%s%s%s%s%s%s", scheme, delim, hostname, delim2,
filename, delim3, fragment);
}
else
{
snprintf (u, len, "%s:%s%s:%d%s%s%s%s", scheme, delim, hostname, up->port,
delim2, filename, delim3, fragment);
}
return(u);
}
/*
* URLCreate
*
* Allocate URLParts and initialize to NULLs
*/
static URLParts *
URLCreate(mp)
MemPool mp;
{
URLParts *up;
up = (URLParts *)MPCGet(mp, sizeof(URLParts));
return(up);
}
/*
* resolve_filename
*
* I'm not sure this is much better than the original.
*/
static char *
resolve_filename(mp, c, p)
MemPool mp;
char *c, *p;
{
char *r;
char *t;
MemPool tmp;
/*
* If current is an absolute path then use it otherwise
* build an absolute path using the parent as a reference.
*/
if (c == NULL || c[0] == '/') r = MPStrDup(mp, c);
else if (c[0] == '~')
{
r = MPGet(mp, strlen(c) + 2);
r[0] = '/';
strcpy(r + 1, c);
}
else
{
tmp = MPCreate();
if (p == NULL || p[0] != '/') p = "/";
else p = whack_filename(tmp, p);
t = compress_path(tmp, c, p);
if (t == NULL) r = MPStrDup(mp, "/");
else r = MPStrDup(mp, t);
MPDestroy(tmp);
}
return(r);
}
/*
* URLResolve
*
* c - current
* p - parent
* r - result
*/
URLParts *
URLResolve(mp, c, p)
MemPool mp;
URLParts *c, *p;
{
URLParts *r;
/*
* If the protocols are different then just return the original with
* some empty fields filled in.
*/
if (c->scheme != NULL && p->scheme != NULL &&
strcasecmp(c->scheme, p->scheme) != 0)
{
r = URLDup(mp, c);
if (r->hostname == NULL) r->hostname = MPStrDup(mp, "localhost");
r->filename = resolve_filename(mp, c->filename, p->filename);
return(r);
}
r = URLCreate(mp);
/*
* If current has a protocol then use it, otherwise
* use the parent's protocol. If the parent doesn't have a protocol for
* some reason then use "file".
*/
if (c->scheme == NULL)
{
if (p->scheme != NULL) r->scheme = MPStrDup(mp, p->scheme);
else r->scheme = MPStrDup(mp, "file");
}
else r->scheme = MPStrDup(mp, c->scheme);
/*
* If current has a hostname then use it, otherwise
* use the parent's hostname. If neither has a hostname then
* fallback to "localhost".
*/
if (c->hostname == NULL)
{
if (p->hostname != NULL)
{
r->hostname = MPStrDup(mp, p->hostname);
r->port = p->port;
}
else
{
r->hostname = MPStrDup(mp, "localhost"); /* fallback */
r->port = 0;
}
}
else
{
r->hostname = MPStrDup(mp, c->hostname);
r->port = c->port;
}
r->filename = resolve_filename(mp, c->filename, p->filename);
/*
* Copy misc. fields.
*/
r->username = MPStrDup(mp, c->username);
r->password = MPStrDup(mp, c->password);
r->fragment = MPStrDup(mp, c->fragment);
return(r);
}
URLParts *
URLDup(mp, up)
MemPool mp;
URLParts *up;
{
URLParts *dp;
dp = URLCreate(mp);
dp->scheme = MPStrDup(mp, up->scheme);
dp->hostname = MPStrDup(mp, up->hostname);
dp->port = up->port;
dp->filename = up->filename != NULL ?
MPStrDup(mp, up->filename):MPStrDup(mp, "/");
dp->fragment = MPStrDup(mp, up->fragment);
dp->username = MPStrDup(mp, up->username);
dp->password = MPStrDup(mp, up->password);
return(dp);
}
/*
* URLParse
*
* Turns a URL into a URLParts structure
*
* The good stuff was written by Rob May <robert.may@rd.eng.bbc.co.uk>
* and heavily mangled/modified by john to suite his own weird style.
*/
URLParts *
URLParse(mp, url)
MemPool mp;
char *url;
{
URLParts *up;
char *cursor;
char *pound; /* link pound (#) sign */
char *at; /* username/password @ */
char *ucolon; /* username colon */
char *pcolon; /* port number colon */
up = URLCreate(mp);
/* skip leading white-space (if any)*/
for (cursor = url; isspace8(*cursor); cursor++)
;
// Extract the scheme, if any
up->scheme = URLGetScheme(mp, cursor);
if (up->scheme)
{
// Skip the scheme and the : that follows
// up->scheme contains the part before the :
// Therefore, its cursor + length == the position of the :
// We know that we have the : there, so skip that position
cursor += strlen(up->scheme) + 1;
}
// If we have scheme://, we have a hostname and filename
// Otherwise, only filename
if (up->scheme && cursor[0] == '/' && cursor[1] == '/')
{
// Move the cursor after the //
cursor += 2;
// We know we have at least the hostname
// Do we also have a slash, marking the existence of filename?
const char *slash = strchr(cursor, '/');
if (slash != NULL)
{
// Yes, until the slash is hostname, after it the filename
up->hostname = MPGet(mp, slash - cursor + 1);
strncpy(up->hostname, cursor, slash - cursor);
up->hostname[slash - cursor] = '\0';
up->filename = MPStrDup(mp, slash);
}
else
{
// No, the whole thing is a hostname
up->hostname = MPStrDup(mp, cursor);
}
}
else
{
// No //, so this is all filename
up->filename = MPStrDup(mp, cursor);
}
/*
* If there is a host string then divide it into
* username:password@hostname:port as needed.
*/
if (up->hostname != NULL)
{
/*
* Look for username:password.
*/
if ((at = strchr(up->hostname, '@')) != NULL)
{
char *mumble;
up->username = MPGet(mp, at - up->hostname + 1);
strncpy(up->username, up->hostname, at - up->hostname);
up->username[at - up->hostname] = '\0';
mumble = MPStrDup(mp, at + 1);
up->hostname = mumble;
if ((ucolon = strchr(up->username, ':')) != NULL)
{
up->password = MPStrDup(mp, ucolon + 1);
*ucolon = '\0';
}
}
/*
* Grab the port.
*/
if ((pcolon = strchr(up->hostname, ':')) != NULL)
{
up->port = atoi(pcolon + 1);
*pcolon = '\0';
}
}
/*
* Check the filename for a '#foo' string.
*/
if (up->filename != NULL)
{
if ((pound = strchr(up->filename, '#')) != NULL)
{
*pound = '\0';
up->fragment = MPStrDup(mp, pound + 1);
if (strlen(up->filename) == 0) up->filename = NULL;
}
}
return(up);
}
/*
* URLIsAbsolute
*/
bool
URLIsAbsolute(up)
URLParts *up;
{
if (up->scheme == NULL) return(false);
return(true);
}
/*
* URLBaseFilename
*/
char *
URLBaseFilename(mp, up)
MemPool mp;
URLParts *up;
{
char *cp;
if (up->filename == NULL) return(NULL);
for (cp = up->filename + strlen(up->filename) - 1;
cp >= up->filename; cp--)
{
if (*cp == '/') break;
}
cp++;
if (*cp == '\0') return(NULL);
return(MPStrDup(mp, cp));
}
/*
* URLGetScheme
*/
char *
URLGetScheme(mp, url)
MemPool mp;
char *url;
{
char *cp, *dp;
char *r;
for (cp = url; *cp != '\0'; cp++)
{
for (dp = URLDELIMS; *dp != '\0'; dp++)
{
// Did we come across a URL delimiter?
if (*cp == *dp)
{
// Yes, was it ':'?
if (*cp == ':')
{
// Yes it was, scheme is what was before it
r = (char *)MPCGet(mp, cp - url + 1);
strncpy(r, url, cp - url);
r[cp - url] = '\0';
return(r);
}
// No, it was something else. No scheme found.
return(NULL);
}
}
}
return(NULL);
}