chimera2/common/url.c

/*
 * url.c
 *
 * Copyright (C) 1993-1997, John Kilburg <john@cs.unlv.edu>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "port_before.h"

#include <stdio.h>
#include <ctype.h>

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif

#include "port_after.h"

#include "url.h"

#define URLDELIMS ":/?#"

#ifndef NullString
#define NullString(s)	(s == NULL || *s == '\0')
#endif

static URLParts *URLCreate _ArgProto((MemPool));
static char *resolve_filename _ArgProto((MemPool, char *, char *));

/*
 * URLcmp
 *
 * Return 0 if equal.
 */
int
URLcmp(u1, u2)
URLParts *u1, *u2;
{
  if (strcasecmp(u1->scheme, u2->scheme) == 0 &&
      strcasecmp(u1->hostname, u2->hostname) == 0 &&
      u1->port == u2->port &&
      strcasecmp(u1->filename, u2->filename) == 0)
  {
    return(0);
  }
  return(-1);
}

/*
 * URLEscape
 *
 * Puts escape codes in URLs.  NOT complete.
 */
char *
URLEscape(mp, url, s2p)
MemPool mp;
char *url;
bool s2p;
{
  char *cp;
  char *n, *s;
  static char *hex = "0123456789ABCDEF";

  /*
   * use a bit of memory so i don't have to mess around here
   */
  s = n = (char *)MPGet(mp, strlen(url) * 3 + 2);

  for (cp = url; *cp; cp++, n++)
  {
    if (*cp == ' ' && s2p)
    {
      *n = '+';
    }
    else if (*cp == '+' && s2p)
    {
      *n = '%';
      n++;
      *n = hex[*cp / 16];
      n++;
      *n = hex[*cp % 16];
    }
#ifdef ORIGINAL_CODE
/*
    else if (isalnum(*cp) || strchr("$-_.'(),+!*", *cp))
*/
    else if (strchr("<>\"#{}|\\^~[]`",*cp))
#else
/* My CGI scripts and the Apache daemon say it's closer the first way - djhjr */
    else if (isalnum(*cp) || strchr("$-_.'(),+!*", *cp))
#endif
    {
      *n = *cp;
    }
    else
    {
      *n = '%';
      n++;
      *n = hex[*cp / 16];
      n++;
      *n = hex[*cp % 16];
    }
  }

  *n = '\0';

  return(s);
}

/*
 * UnescapeURL
 *
 * Converts the escape codes (%xx) into actual characters.  NOT complete.
 * Could do everthing in place I guess.
 */
char *
URLUnescape(mp, url)
MemPool mp;
char *url;
{
  char *cp, *n, *s;
  char hex[3];

  s = n = (char *)MPGet(mp, strlen(url) + 2);
  for (cp = url; *cp; cp++, n++)
  {
    if (*cp == '%')
    {
      cp++;
      if (*cp == '%')
      {
	*n = *cp;
      }
      else
      {
	hex[0] = *cp;
	cp++;
	hex[1] = *cp;
	hex[2] = '\0';
	*n = (char)strtol(hex, NULL, 16);
      }
    }
    else if (*cp == '+') *n = ' ';
    else
    {
      *n = *cp;
    }
  }

  *n = '\0';

  return(s);
}

/*
 * URLMakeString
 */
char *
URLMakeString(mp, up, addfrag)
MemPool mp;
URLParts *up;
bool addfrag;
{
  size_t len;
  char *u;
  char *delim;
  char *delim2;
  char *filename;
  char *hostname;
  char *scheme;
  char *delim3;
  char *fragment;

  if (NullString(up->scheme)) scheme = "file";
  else scheme = up->scheme;

  if (NullString(up->hostname))
  {
    delim = "";
    hostname = "";
  }
  else
  {
    delim = "//";
    hostname = up->hostname;
  }

  if (NullString(up->filename)) filename = "/";
  else filename = up->filename;

  delim2 = "";

  if (up->fragment != NULL && addfrag)
  {
    fragment = up->fragment;
    delim3 = "#";
  }
  else
  {
    fragment = "";
    delim3 = "";
  }

  len = strlen(scheme) + strlen(hostname) + strlen(filename) +
        strlen(delim) + strlen(fragment) + 11;
  u = (char *)MPGet(mp, len + 1);
  if (up->port == 0)
  {
    snprintf (u, len, "%s:%s%s%s%s%s%s", scheme, delim, hostname, delim2,
	      filename, delim3, fragment);
  }
  else
  {
    snprintf (u, len, "%s:%s%s:%d%s%s%s%s", scheme, delim, hostname, up->port,
	      delim2, filename, delim3, fragment);
  }

  return(u);
}

/*
 * URLCreate
 *
 * Allocate URLParts and initialize to NULLs
 */
static URLParts *
URLCreate(mp)
MemPool mp;
{
  URLParts *up;

  up = (URLParts *)MPCGet(mp, sizeof(URLParts));

  return(up);
}

/*
 * resolve_filename
 *
 * I'm not sure this is much better than the original.
 */
static char *
resolve_filename(mp, c, p)
MemPool mp;
char *c, *p;
{
  char *r;
  char *t;
  MemPool tmp;

  /*
   * If current is an absolute path then use it otherwise
   * build an absolute path using the parent as a reference.
   */
  if (c == NULL || c[0] == '/') r = MPStrDup(mp, c);
  else if (c[0] == '~')
  {
    r = MPGet(mp, strlen(c) + 2);
    r[0] = '/';
    strcpy(r + 1, c);
  }
  else
  {
    tmp = MPCreate();
    if (p == NULL || p[0] != '/') p = "/";
    else p = whack_filename(tmp, p);
    t = compress_path(tmp, c, p);
    if (t == NULL) r = MPStrDup(mp, "/");
    else r = MPStrDup(mp, t);
    MPDestroy(tmp);
  }

  return(r);
}

/*
 * URLResolve
 *
 * c - current
 * p - parent
 * r - result
 */
URLParts *
URLResolve(mp, c, p)
MemPool mp;
URLParts *c, *p;
{
  URLParts *r;

  /*
   * If the protocols are different then just return the original with
   * some empty fields filled in.
   */
  if (c->scheme != NULL && p->scheme != NULL &&
      strcasecmp(c->scheme, p->scheme) != 0)
  {
    r = URLDup(mp, c);
    if (r->hostname == NULL) r->hostname = MPStrDup(mp, "localhost");
    r->filename = resolve_filename(mp, c->filename, p->filename);
    return(r);
  }

  r = URLCreate(mp);

  /*
   * If current has a protocol then use it, otherwise
   * use the parent's protocol.  If the parent doesn't have a protocol for
   * some reason then use "file".
   */
  if (c->scheme == NULL)
  {
    if (p->scheme != NULL) r->scheme = MPStrDup(mp, p->scheme);
    else r->scheme = MPStrDup(mp, "file");
  }
  else r->scheme = MPStrDup(mp, c->scheme);

  /*
   * If current has a hostname then use it, otherwise
   * use the parent's hostname.  If neither has a hostname then
   * fallback to "localhost".
   */
  if (c->hostname == NULL)
  {
    if (p->hostname != NULL)
    {
      r->hostname = MPStrDup(mp, p->hostname);
      r->port = p->port;
    }
    else
    {
      r->hostname = MPStrDup(mp, "localhost"); /* fallback */
      r->port = 0;
    }
  }
  else
  {
    r->hostname = MPStrDup(mp, c->hostname);
    r->port = c->port;
  }

  r->filename = resolve_filename(mp, c->filename, p->filename);

  /*
   * Copy misc. fields.
   */
  r->username = MPStrDup(mp, c->username);
  r->password = MPStrDup(mp, c->password);
  r->fragment = MPStrDup(mp, c->fragment);

  return(r);
}

URLParts *
URLDup(mp, up)
MemPool mp;
URLParts *up;
{
  URLParts *dp;

  dp = URLCreate(mp);
  dp->scheme = MPStrDup(mp, up->scheme);
  dp->hostname = MPStrDup(mp, up->hostname);
  dp->port = up->port;

  dp->filename = up->filename != NULL ?
      MPStrDup(mp, up->filename):MPStrDup(mp, "/");

  dp->fragment = MPStrDup(mp, up->fragment);

  dp->username = MPStrDup(mp, up->username);
  dp->password = MPStrDup(mp, up->password);

  return(dp);
}

/*
 * URLParse
 *
 * Turns a URL into a URLParts structure
 *
 * The good stuff was written by Rob May <robert.may@rd.eng.bbc.co.uk>
 * and heavily mangled/modified by john to suite his own weird style.
 */
URLParts *
URLParse(mp, url)
MemPool mp;
char *url;
{
  URLParts *up;
  char *cursor;
  char *pound; /* link pound (#) sign */
  char *at; /* username/password @ */
  char *ucolon; /* username colon */
  char *pcolon; /* port number colon */

  up = URLCreate(mp);

  /* skip leading white-space (if any)*/
  for (cursor = url; isspace8(*cursor); cursor++)
      ;

  // Extract the scheme, if any
  up->scheme = URLGetScheme(mp, cursor);

  if (up->scheme)
  {
    // Skip the scheme and the : that follows
    // up->scheme contains the part before the :
    // Therefore, its cursor + length == the position of the :
    // We know that we have the : there, so skip that position
    cursor += strlen(up->scheme) + 1;
  }

  // If we have scheme://, we have a hostname and filename
  // Otherwise, only filename
  if (up->scheme && cursor[0] == '/' && cursor[1] == '/')
  {
    // Move the cursor after the //
    cursor += 2;

    // We know we have at least the hostname
    // Do we also have a slash, marking the existence of filename?
    const char *slash = strchr(cursor, '/');
    if (slash != NULL)
    {
      // Yes, until the slash is hostname, after it the filename
      up->hostname = MPGet(mp, slash - cursor + 1);
      strncpy(up->hostname, cursor, slash - cursor);
      up->hostname[slash - cursor] = '\0';
      up->filename = MPStrDup(mp, slash);
    }
    else
    {
      // No, the whole thing is a hostname
      up->hostname = MPStrDup(mp, cursor);
    }
  }
  else
  {
    // No //, so this is all filename
    up->filename = MPStrDup(mp, cursor);
  }

  /*
   * If there is a host string then divide it into
   * username:password@hostname:port as needed.
   */
  if (up->hostname != NULL)
  {
    /*
     * Look for username:password.
     */
    if ((at = strchr(up->hostname, '@')) != NULL)
    {
      char *mumble;

      up->username = MPGet(mp, at - up->hostname + 1);
      strncpy(up->username, up->hostname, at - up->hostname);
      up->username[at - up->hostname] = '\0';

      mumble = MPStrDup(mp, at + 1);
      up->hostname = mumble;

      if ((ucolon = strchr(up->username, ':')) != NULL)
      {
	up->password = MPStrDup(mp, ucolon + 1);
	*ucolon = '\0';
      }
    }

    /*
     * Grab the port.
     */
    if ((pcolon = strchr(up->hostname, ':')) != NULL)
    {
      up->port = atoi(pcolon + 1);
      *pcolon = '\0';
    }
  }

  /*
   * Check the filename for a '#foo' string.
   */
  if (up->filename != NULL)
  {
    if ((pound = strchr(up->filename, '#')) != NULL)
    {
      *pound = '\0';
      up->fragment = MPStrDup(mp, pound + 1);

      if (strlen(up->filename) == 0) up->filename = NULL;
    }
  }

  return(up);
}


/*
 * URLIsAbsolute
 */
bool
URLIsAbsolute(up)
URLParts *up;
{
  if (up->scheme == NULL) return(false);
  return(true);
}

/*
 * URLBaseFilename
 */
char *
URLBaseFilename(mp, up)
MemPool mp;
URLParts *up;
{
  char *cp;

  if (up->filename == NULL) return(NULL);

  for (cp = up->filename + strlen(up->filename) - 1;
       cp >= up->filename; cp--)
  {
    if (*cp == '/') break;
  }
  cp++;
  if (*cp == '\0') return(NULL);

  return(MPStrDup(mp, cp));
}

/*
 * URLGetScheme
 */
char *
URLGetScheme(mp, url)
MemPool mp;
char *url;
{
  char *cp, *dp;
  char *r;

  for (cp = url; *cp != '\0'; cp++)
  {
    for (dp = URLDELIMS; *dp != '\0'; dp++)
    {
      // Did we come across a URL delimiter?
      if (*cp == *dp)
      {
        // Yes, was it ':'?
	if (*cp == ':')
	{
	  // Yes it was, scheme is what was before it
	  r = (char *)MPCGet(mp, cp - url + 1);
	  strncpy(r, url, cp - url);
	  r[cp - url] = '\0';
	  return(r);
	}
	// No, it was something else. No scheme found.
	return(NULL);
      }
    }
  }

  return(NULL);
}