mirror of
https://github.com/mirror/wget.git
synced 2025-02-01 15:20:08 +08:00
[svn] Clean up handling of schemes.
Published in <sxswv0n7h7s.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
303f406997
commit
f178e6c613
@ -1,3 +1,7 @@
|
|||||||
|
2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* url.c: Clean up handling of URL schemes.
|
||||||
|
|
||||||
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* url.c: Get rid of `protostrings'.
|
* url.c: Get rid of `protostrings'.
|
||||||
|
@ -278,12 +278,12 @@ same_host (const char *u1, const char *u2)
|
|||||||
char *real1, *real2;
|
char *real1, *real2;
|
||||||
|
|
||||||
/* Skip protocol, if present. */
|
/* Skip protocol, if present. */
|
||||||
u1 += skip_proto (u1);
|
u1 += url_skip_scheme (u1);
|
||||||
u2 += skip_proto (u2);
|
u2 += url_skip_scheme (u2);
|
||||||
|
|
||||||
/* Skip username ans password, if present. */
|
/* Skip username ans password, if present. */
|
||||||
u1 += skip_uname (u1);
|
u1 += url_skip_uname (u1);
|
||||||
u2 += skip_uname (u2);
|
u2 += url_skip_uname (u2);
|
||||||
|
|
||||||
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
|
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
|
||||||
p1 = strdupdelim (s, u1);
|
p1 = strdupdelim (s, u1);
|
||||||
|
@ -301,7 +301,7 @@ static void
|
|||||||
handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
||||||
struct taginfo *tag, int attrid)
|
struct taginfo *tag, int attrid)
|
||||||
{
|
{
|
||||||
int no_proto = !has_proto (link_uri);
|
int no_scheme = !url_has_scheme (link_uri);
|
||||||
urlpos *newel;
|
urlpos *newel;
|
||||||
|
|
||||||
const char *base = closure->base ? closure->base : closure->parent_base;
|
const char *base = closure->base ? closure->base : closure->parent_base;
|
||||||
@ -324,10 +324,10 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
|||||||
|
|
||||||
if (!base)
|
if (!base)
|
||||||
{
|
{
|
||||||
if (no_proto)
|
if (no_scheme)
|
||||||
{
|
{
|
||||||
/* We have no base, and the link does not have a protocol or
|
/* We have no base, and the link does not have a host
|
||||||
a host attached to it. Nothing we can do. */
|
attached to it. Nothing we can do. */
|
||||||
/* #### Should we print a warning here? Wget 1.5.x used to. */
|
/* #### Should we print a warning here? Wget 1.5.x used to. */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -349,11 +349,11 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
|||||||
newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
|
newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
|
||||||
newel->size = tag->attrs[attrid].value_raw_size;
|
newel->size = tag->attrs[attrid].value_raw_size;
|
||||||
|
|
||||||
/* A URL is relative if the host and protocol are not named, and the
|
/* A URL is relative if the host is not named, and the name does not
|
||||||
name does not start with `/'. */
|
start with `/'. */
|
||||||
if (no_proto && *link_uri != '/')
|
if (no_scheme && *link_uri != '/')
|
||||||
newel->link_relative_p = 1;
|
newel->link_relative_p = 1;
|
||||||
else if (!no_proto)
|
else if (!no_scheme)
|
||||||
newel->link_complete_p = 1;
|
newel->link_complete_p = 1;
|
||||||
|
|
||||||
if (closure->tail)
|
if (closure->tail)
|
||||||
|
17
src/http.c
17
src/http.c
@ -614,7 +614,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
#ifndef HAVE_SSL
|
#ifndef HAVE_SSL
|
||||||
!persistent_available_p (u->host, u->port)
|
!persistent_available_p (u->host, u->port)
|
||||||
#else
|
#else
|
||||||
!persistent_available_p (u->host, u->port, (u->proto==URLHTTPS ? 1 : 0))
|
!persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
|
||||||
#endif /* HAVE_SSL */
|
#endif /* HAVE_SSL */
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -653,7 +653,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
if (u->proto == URLHTTPS)
|
if (u->scheme == SCHEME_HTTPS)
|
||||||
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
|
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
|
||||||
{
|
{
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
@ -786,7 +786,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
port_maybe = NULL;
|
port_maybe = NULL;
|
||||||
if (1
|
if (1
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
&& remport != (u->proto == URLHTTPS
|
&& remport != (u->scheme == SCHEME_HTTPS
|
||||||
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
|
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
|
||||||
#else
|
#else
|
||||||
&& remport != DEFAULT_HTTP_PORT
|
&& remport != DEFAULT_HTTP_PORT
|
||||||
@ -804,7 +804,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
|
|
||||||
if (opt.cookies)
|
if (opt.cookies)
|
||||||
cookies = build_cookies_request (ou->host, ou->port, ou->path,
|
cookies = build_cookies_request (ou->host, ou->port, ou->path,
|
||||||
ou->proto == URLHTTPS);
|
#ifdef HAVE_SSL
|
||||||
|
ou->scheme == SCHEME_HTTPS
|
||||||
|
#else
|
||||||
|
0
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
|
||||||
/* Allocate the memory for the request. */
|
/* Allocate the memory for the request. */
|
||||||
request = (char *)alloca (strlen (command) + strlen (path)
|
request = (char *)alloca (strlen (command) + strlen (path)
|
||||||
@ -848,7 +853,7 @@ Accept: %s\r\n\
|
|||||||
|
|
||||||
/* Send the request to server. */
|
/* Send the request to server. */
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
if (u->proto == URLHTTPS)
|
if (u->scheme == SCHEME_HTTPS)
|
||||||
num_written = ssl_iwrite (ssl, request, strlen (request));
|
num_written = ssl_iwrite (ssl, request, strlen (request));
|
||||||
else
|
else
|
||||||
#endif /* HAVE_SSL */
|
#endif /* HAVE_SSL */
|
||||||
@ -871,7 +876,7 @@ Accept: %s\r\n\
|
|||||||
/* Before reading anything, initialize the rbuf. */
|
/* Before reading anything, initialize the rbuf. */
|
||||||
rbuf_initialize (&rbuf, sock);
|
rbuf_initialize (&rbuf, sock);
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
if (u->proto == URLHTTPS)
|
if (u->scheme == SCHEME_HTTPS)
|
||||||
rbuf.ssl = ssl;
|
rbuf.ssl = ssl;
|
||||||
else
|
else
|
||||||
rbuf.ssl = NULL;
|
rbuf.ssl = NULL;
|
||||||
|
22
src/recur.c
22
src/recur.c
@ -187,7 +187,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
that the retrieval is done through proxy. In that case, FTP
|
that the retrieval is done through proxy. In that case, FTP
|
||||||
links will be followed by default and recursion will not be
|
links will be followed by default and recursion will not be
|
||||||
turned off when following them. */
|
turned off when following them. */
|
||||||
this_url_ftp = (urlproto (this_url) == URLFTP);
|
this_url_ftp = (url_scheme (this_url) == SCHEME_FTP);
|
||||||
|
|
||||||
/* Get the URL-s from an HTML file: */
|
/* Get the URL-s from an HTML file: */
|
||||||
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
|
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
|
||||||
@ -217,12 +217,6 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (u->proto == URLFILE)
|
|
||||||
{
|
|
||||||
DEBUGP (("Nothing to do with file:// around here.\n"));
|
|
||||||
freeurl (u, 1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
assert (u->url != NULL);
|
assert (u->url != NULL);
|
||||||
constr = xstrdup (u->url);
|
constr = xstrdup (u->url);
|
||||||
|
|
||||||
@ -254,7 +248,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
|
|
||||||
/* If it is FTP, and FTP is not followed, chuck it out. */
|
/* If it is FTP, and FTP is not followed, chuck it out. */
|
||||||
if (!inl)
|
if (!inl)
|
||||||
if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
|
if (u->scheme == SCHEME_FTP && !opt.follow_ftp && !this_url_ftp)
|
||||||
{
|
{
|
||||||
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
|
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
|
||||||
string_set_add (undesirable_urls, constr);
|
string_set_add (undesirable_urls, constr);
|
||||||
@ -262,7 +256,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
}
|
}
|
||||||
/* If it is absolute link and they are not followed, chuck it
|
/* If it is absolute link and they are not followed, chuck it
|
||||||
out. */
|
out. */
|
||||||
if (!inl && u->proto != URLFTP)
|
if (!inl && u->scheme != SCHEME_FTP)
|
||||||
if (opt.relative_only && !cur_url->link_relative_p)
|
if (opt.relative_only && !cur_url->link_relative_p)
|
||||||
{
|
{
|
||||||
DEBUGP (("It doesn't really look like a relative link.\n"));
|
DEBUGP (("It doesn't really look like a relative link.\n"));
|
||||||
@ -281,7 +275,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
if (!inl && opt.no_parent
|
if (!inl && opt.no_parent
|
||||||
/* If the new URL is FTP and the old was not, ignore
|
/* If the new URL is FTP and the old was not, ignore
|
||||||
opt.no_parent. */
|
opt.no_parent. */
|
||||||
&& !(!this_url_ftp && u->proto == URLFTP))
|
&& !(!this_url_ftp && u->scheme == SCHEME_FTP))
|
||||||
{
|
{
|
||||||
/* Check for base_dir first. */
|
/* Check for base_dir first. */
|
||||||
if (!(base_dir && frontcmp (base_dir, u->dir)))
|
if (!(base_dir && frontcmp (base_dir, u->dir)))
|
||||||
@ -368,7 +362,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
/* This line is bogus. */
|
/* This line is bogus. */
|
||||||
/*string_set_add (undesirable_urls, constr);*/
|
/*string_set_add (undesirable_urls, constr);*/
|
||||||
|
|
||||||
if (!inl && !((u->proto == URLFTP) && !this_url_ftp))
|
if (!inl && !((u->scheme == SCHEME_FTP) && !this_url_ftp))
|
||||||
if (!opt.spanhost && this_url && !same_host (this_url, constr))
|
if (!opt.spanhost && this_url && !same_host (this_url, constr))
|
||||||
{
|
{
|
||||||
DEBUGP (("This is not the same hostname as the parent's.\n"));
|
DEBUGP (("This is not the same hostname as the parent's.\n"));
|
||||||
@ -377,7 +371,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* What about robots.txt? */
|
/* What about robots.txt? */
|
||||||
if (!inl && opt.use_robots && u->proto == URLHTTP)
|
if (!inl && opt.use_robots && u->scheme == SCHEME_FTP)
|
||||||
{
|
{
|
||||||
struct robot_specs *specs = res_get_specs (u->host, u->port);
|
struct robot_specs *specs = res_get_specs (u->host, u->port);
|
||||||
if (!specs)
|
if (!specs)
|
||||||
@ -418,7 +412,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
string_set_add (undesirable_urls, constr);
|
string_set_add (undesirable_urls, constr);
|
||||||
/* Automatically followed FTPs will *not* be downloaded
|
/* Automatically followed FTPs will *not* be downloaded
|
||||||
recursively. */
|
recursively. */
|
||||||
if (u->proto == URLFTP)
|
if (u->scheme == SCHEME_FTP)
|
||||||
{
|
{
|
||||||
/* Don't you adore side-effects? */
|
/* Don't you adore side-effects? */
|
||||||
opt.recursive = 0;
|
opt.recursive = 0;
|
||||||
@ -428,7 +422,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
/* Retrieve it. */
|
/* Retrieve it. */
|
||||||
retrieve_url (constr, &filename, &newloc,
|
retrieve_url (constr, &filename, &newloc,
|
||||||
canon_this_url ? canon_this_url : this_url, &dt);
|
canon_this_url ? canon_this_url : this_url, &dt);
|
||||||
if (u->proto == URLFTP)
|
if (u->scheme == SCHEME_FTP)
|
||||||
{
|
{
|
||||||
/* Restore... */
|
/* Restore... */
|
||||||
opt.recursive = 1;
|
opt.recursive = 1;
|
||||||
|
21
src/retr.c
21
src/retr.c
@ -300,7 +300,7 @@ rate (long bytes, long msecs, int pad)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
|
#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
|
||||||
&& no_proxy_match((u)->host, \
|
&& no_proxy_match((u)->host, \
|
||||||
(const char **)opt.no_proxy))
|
(const char **)opt.no_proxy))
|
||||||
|
|
||||||
@ -366,8 +366,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
memset (u, 0, sizeof (*u));
|
memset (u, 0, sizeof (*u));
|
||||||
u->proxy = pu;
|
u->proxy = pu;
|
||||||
/* Get the appropriate proxy server, appropriate for the
|
/* Get the appropriate proxy server, appropriate for the
|
||||||
current protocol. */
|
current scheme. */
|
||||||
proxy = getproxy (pu->proto);
|
proxy = getproxy (pu->scheme);
|
||||||
if (!proxy)
|
if (!proxy)
|
||||||
{
|
{
|
||||||
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
||||||
@ -379,9 +379,9 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
}
|
}
|
||||||
/* Parse the proxy URL. */
|
/* Parse the proxy URL. */
|
||||||
result = parseurl (proxy, u, 0);
|
result = parseurl (proxy, u, 0);
|
||||||
if (result != URLOK || u->proto != URLHTTP)
|
if (result != URLOK || u->scheme != SCHEME_HTTP)
|
||||||
{
|
{
|
||||||
if (u->proto == URLHTTP)
|
if (u->scheme == SCHEME_HTTP)
|
||||||
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
|
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
|
||||||
else
|
else
|
||||||
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
||||||
@ -391,19 +391,18 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
xfree (url);
|
xfree (url);
|
||||||
return PROXERR;
|
return PROXERR;
|
||||||
}
|
}
|
||||||
u->proto = URLHTTP;
|
u->scheme = SCHEME_HTTP;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert (u->proto != URLFILE); /* #### Implement me! */
|
|
||||||
mynewloc = NULL;
|
mynewloc = NULL;
|
||||||
|
|
||||||
if (u->proto == URLHTTP
|
if (u->scheme == SCHEME_HTTP
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
|| u->proto == URLHTTPS
|
|| u->scheme == SCHEME_HTTPS
|
||||||
#endif
|
#endif
|
||||||
)
|
)
|
||||||
result = http_loop (u, &mynewloc, dt);
|
result = http_loop (u, &mynewloc, dt);
|
||||||
else if (u->proto == URLFTP)
|
else if (u->scheme == SCHEME_FTP)
|
||||||
{
|
{
|
||||||
/* If this is a redirection, we must not allow recursive FTP
|
/* If this is a redirection, we must not allow recursive FTP
|
||||||
retrieval, so we save recursion to oldrec, and restore it
|
retrieval, so we save recursion to oldrec, and restore it
|
||||||
@ -420,7 +419,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
|
|
||||||
#### All of this is, of course, crap. These types should be
|
#### All of this is, of course, crap. These types should be
|
||||||
determined through mailcap. */
|
determined through mailcap. */
|
||||||
if (redirections && u->local && (u->proto == URLFTP ))
|
if (redirections && u->local && (u->scheme == SCHEME_FTP))
|
||||||
{
|
{
|
||||||
char *suf = suffix (u->local);
|
char *suf = suffix (u->local);
|
||||||
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
||||||
|
202
src/url.c
202
src/url.c
@ -49,21 +49,21 @@ extern int errno;
|
|||||||
|
|
||||||
static int urlpath_length PARAMS ((const char *));
|
static int urlpath_length PARAMS ((const char *));
|
||||||
|
|
||||||
struct proto
|
struct scheme_data
|
||||||
{
|
{
|
||||||
char *name;
|
enum url_scheme scheme;
|
||||||
uerr_t ind;
|
char *leading_string;
|
||||||
unsigned short port;
|
int default_port;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Supported protocols: */
|
/* Supported schemes: */
|
||||||
static struct proto sup_protos[] =
|
static struct scheme_data supported_schemes[] =
|
||||||
{
|
{
|
||||||
{ "http://", URLHTTP, DEFAULT_HTTP_PORT },
|
{ SCHEME_HTTP, "http://", DEFAULT_HTTP_PORT },
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
{ "https://",URLHTTPS, DEFAULT_HTTPS_PORT},
|
{ SCHEME_HTTPS, "https://", DEFAULT_HTTPS_PORT },
|
||||||
#endif
|
#endif
|
||||||
{ "ftp://", URLFTP, DEFAULT_FTP_PORT }
|
{ SCHEME_FTP, "ftp://", DEFAULT_FTP_PORT }
|
||||||
};
|
};
|
||||||
|
|
||||||
static void parse_dir PARAMS ((const char *, char **, char **));
|
static void parse_dir PARAMS ((const char *, char **, char **));
|
||||||
@ -229,39 +229,28 @@ encode_string (const char *s)
|
|||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/* Returns the protocol type if URL's protocol is supported, or
|
/* Returns the scheme type if the scheme is supported, or
|
||||||
URLUNKNOWN if not. */
|
SCHEME_INVALID if not. */
|
||||||
uerr_t
|
enum url_scheme
|
||||||
urlproto (const char *url)
|
url_scheme (const char *url)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
|
for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
|
||||||
if (!strncasecmp (url, sup_protos[i].name, strlen (sup_protos[i].name)))
|
if (!strncasecmp (url, supported_schemes[i].leading_string,
|
||||||
return sup_protos[i].ind;
|
strlen (supported_schemes[i].leading_string)))
|
||||||
for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++);
|
return supported_schemes[i].scheme;
|
||||||
if (url[i] == ':')
|
return SCHEME_INVALID;
|
||||||
{
|
|
||||||
for (++i; url[i] && url[i] != '/'; i++)
|
|
||||||
if (!ISDIGIT (url[i]))
|
|
||||||
return URLBADPORT;
|
|
||||||
if (url[i - 1] == ':')
|
|
||||||
return URLFTP;
|
|
||||||
else
|
|
||||||
return URLHTTP;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return URLHTTP;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip the protocol part of the URL, e.g. `http://'. If no protocol
|
/* Return the number of characters needed to skip the scheme part of
|
||||||
part is found, returns 0. */
|
the URL, e.g. `http://'. If no scheme is found, returns 0. */
|
||||||
int
|
int
|
||||||
skip_proto (const char *url)
|
url_skip_scheme (const char *url)
|
||||||
{
|
{
|
||||||
const char *p = url;
|
const char *p = url;
|
||||||
|
|
||||||
/* Skip protocol name. We allow `-' and `+' because of `whois++',
|
/* Skip the scheme name. We allow `-' and `+' because of `whois++',
|
||||||
etc. */
|
etc. */
|
||||||
while (ISALNUM (*p) || *p == '-' || *p == '+')
|
while (ISALNUM (*p) || *p == '-' || *p == '+')
|
||||||
++p;
|
++p;
|
||||||
@ -277,10 +266,10 @@ skip_proto (const char *url)
|
|||||||
return p - url;
|
return p - url;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns 1 if the URL begins with a protocol (supported or
|
/* Returns 1 if the URL begins with a scheme (supported or
|
||||||
unsupported), 0 otherwise. */
|
unsupported), 0 otherwise. */
|
||||||
int
|
int
|
||||||
has_proto (const char *url)
|
url_has_scheme (const char *url)
|
||||||
{
|
{
|
||||||
const char *p = url;
|
const char *p = url;
|
||||||
while (ISALNUM (*p) || *p == '-' || *p == '+')
|
while (ISALNUM (*p) || *p == '-' || *p == '+')
|
||||||
@ -290,11 +279,11 @@ has_proto (const char *url)
|
|||||||
|
|
||||||
/* Skip the username and password, if present here. The function
|
/* Skip the username and password, if present here. The function
|
||||||
should be called *not* with the complete URL, but with the part
|
should be called *not* with the complete URL, but with the part
|
||||||
right after the protocol.
|
right after the scheme.
|
||||||
|
|
||||||
If no username and password are found, return 0. */
|
If no username and password are found, return 0. */
|
||||||
int
|
int
|
||||||
skip_uname (const char *url)
|
url_skip_uname (const char *url)
|
||||||
{
|
{
|
||||||
const char *p;
|
const char *p;
|
||||||
const char *q = NULL;
|
const char *q = NULL;
|
||||||
@ -317,7 +306,7 @@ newurl (void)
|
|||||||
|
|
||||||
u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
|
u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
|
||||||
memset (u, 0, sizeof (*u));
|
memset (u, 0, sizeof (*u));
|
||||||
u->proto = URLUNKNOWN;
|
u->scheme = SCHEME_INVALID;
|
||||||
return u;
|
return u;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -344,10 +333,14 @@ freeurl (struct urlinfo *u, int complete)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum url_parse_error {
|
||||||
|
PE_UNRECOGNIZED_SCHEME, PE_BAD_PORT
|
||||||
|
};
|
||||||
|
|
||||||
/* Extract the given URL of the form
|
/* Extract the given URL of the form
|
||||||
(http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
|
(http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
|
||||||
1. hostname (terminated with `/' or `:')
|
1. hostname (terminated with `/' or `:')
|
||||||
2. port number (terminated with `/'), or chosen for the protocol
|
2. port number (terminated with `/'), or chosen for the scheme
|
||||||
3. dirname (everything after hostname)
|
3. dirname (everything after hostname)
|
||||||
Most errors are handled. No allocation is done, you must supply
|
Most errors are handled. No allocation is done, you must supply
|
||||||
pointers to allocated memory.
|
pointers to allocated memory.
|
||||||
@ -367,36 +360,36 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
|||||||
{
|
{
|
||||||
int i, l, abs_ftp;
|
int i, l, abs_ftp;
|
||||||
int recognizable; /* Recognizable URL is the one where
|
int recognizable; /* Recognizable URL is the one where
|
||||||
the protocol name was explicitly
|
the scheme was explicitly named,
|
||||||
named, i.e. it wasn't deduced from
|
i.e. it wasn't deduced from the URL
|
||||||
the URL format. */
|
format. */
|
||||||
uerr_t type;
|
uerr_t type;
|
||||||
|
|
||||||
DEBUGP (("parseurl (\"%s\") -> ", url));
|
DEBUGP (("parseurl (\"%s\") -> ", url));
|
||||||
recognizable = has_proto (url);
|
recognizable = url_has_scheme (url);
|
||||||
if (strict && !recognizable)
|
if (strict && !recognizable)
|
||||||
return URLUNKNOWN;
|
return URLUNKNOWN;
|
||||||
for (i = 0, l = 0; i < ARRAY_SIZE (sup_protos); i++)
|
for (i = 0, l = 0; i < ARRAY_SIZE (supported_schemes); i++)
|
||||||
{
|
{
|
||||||
l = strlen (sup_protos[i].name);
|
l = strlen (supported_schemes[i].leading_string);
|
||||||
if (!strncasecmp (sup_protos[i].name, url, l))
|
if (!strncasecmp (supported_schemes[i].leading_string, url, l))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* If protocol is recognizable, but unsupported, bail out, else
|
/* If scheme is recognizable, but unsupported, bail out, else
|
||||||
suppose unknown. */
|
suppose unknown. */
|
||||||
if (recognizable && i == ARRAY_SIZE (sup_protos))
|
if (recognizable && i == ARRAY_SIZE (supported_schemes))
|
||||||
return URLUNKNOWN;
|
return URLUNKNOWN;
|
||||||
else if (i == ARRAY_SIZE (sup_protos))
|
else if (i == ARRAY_SIZE (supported_schemes))
|
||||||
type = URLUNKNOWN;
|
type = URLUNKNOWN;
|
||||||
else
|
else
|
||||||
u->proto = type = sup_protos[i].ind;
|
u->scheme = type = supported_schemes[i].scheme;
|
||||||
|
|
||||||
if (type == URLUNKNOWN)
|
if (type == URLUNKNOWN)
|
||||||
l = 0;
|
l = 0;
|
||||||
/* Allow a username and password to be specified (i.e. just skip
|
/* Allow a username and password to be specified (i.e. just skip
|
||||||
them for now). */
|
them for now). */
|
||||||
if (recognizable)
|
if (recognizable)
|
||||||
l += skip_uname (url + l);
|
l += url_skip_uname (url + l);
|
||||||
for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
|
for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
|
||||||
if (i == l)
|
if (i == l)
|
||||||
return URLBADHOST;
|
return URLBADHOST;
|
||||||
@ -413,7 +406,10 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
|||||||
if (ISDIGIT (url[++i])) /* A port number */
|
if (ISDIGIT (url[++i])) /* A port number */
|
||||||
{
|
{
|
||||||
if (type == URLUNKNOWN)
|
if (type == URLUNKNOWN)
|
||||||
u->proto = type = URLHTTP;
|
{
|
||||||
|
type = URLHTTP;
|
||||||
|
u->scheme = SCHEME_HTTP;
|
||||||
|
}
|
||||||
for (; url[i] && url[i] != '/'; i++)
|
for (; url[i] && url[i] != '/'; i++)
|
||||||
if (ISDIGIT (url[i]))
|
if (ISDIGIT (url[i]))
|
||||||
u->port = 10 * u->port + (url[i] - '0');
|
u->port = 10 * u->port + (url[i] - '0');
|
||||||
@ -424,21 +420,27 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
|||||||
DEBUGP (("port %hu -> ", u->port));
|
DEBUGP (("port %hu -> ", u->port));
|
||||||
}
|
}
|
||||||
else if (type == URLUNKNOWN) /* or a directory */
|
else if (type == URLUNKNOWN) /* or a directory */
|
||||||
u->proto = type = URLFTP;
|
{
|
||||||
|
type = URLFTP;
|
||||||
|
u->scheme = SCHEME_FTP;
|
||||||
|
}
|
||||||
else /* or just a misformed port number */
|
else /* or just a misformed port number */
|
||||||
return URLBADPORT;
|
return URLBADPORT;
|
||||||
}
|
}
|
||||||
else if (type == URLUNKNOWN)
|
else if (type == URLUNKNOWN)
|
||||||
u->proto = type = URLHTTP;
|
{
|
||||||
|
type = URLHTTP;
|
||||||
|
u->scheme = SCHEME_HTTP;
|
||||||
|
}
|
||||||
if (!u->port)
|
if (!u->port)
|
||||||
{
|
{
|
||||||
int ind;
|
int ind;
|
||||||
for (ind = 0; ind < ARRAY_SIZE (sup_protos); ind++)
|
for (ind = 0; ind < ARRAY_SIZE (supported_schemes); ind++)
|
||||||
if (sup_protos[ind].ind == type)
|
if (supported_schemes[ind].scheme == u->scheme)
|
||||||
break;
|
break;
|
||||||
if (ind == ARRAY_SIZE (sup_protos))
|
if (ind == ARRAY_SIZE (supported_schemes))
|
||||||
return URLUNKNOWN;
|
return URLUNKNOWN;
|
||||||
u->port = sup_protos[ind].port;
|
u->port = supported_schemes[ind].default_port;
|
||||||
}
|
}
|
||||||
/* Some delimiter troubles... */
|
/* Some delimiter troubles... */
|
||||||
if (url[i] == '/' && url[i - 1] != ':')
|
if (url[i] == '/' && url[i - 1] != ':')
|
||||||
@ -480,7 +482,7 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
|||||||
if (l > 1 && u->dir[l - 1] == '/')
|
if (l > 1 && u->dir[l - 1] == '/')
|
||||||
u->dir[l - 1] = '\0';
|
u->dir[l - 1] = '\0';
|
||||||
/* Re-create the path: */
|
/* Re-create the path: */
|
||||||
abs_ftp = (u->proto == URLFTP && *u->dir == '/');
|
abs_ftp = (u->scheme == SCHEME_FTP && *u->dir == '/');
|
||||||
/* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
|
/* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
|
||||||
abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
|
abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
|
||||||
strcpy (u->path, abs_ftp ? "%2F" : "/");
|
strcpy (u->path, abs_ftp ? "%2F" : "/");
|
||||||
@ -574,11 +576,10 @@ parse_uname (const char *url, char **user, char **passwd)
|
|||||||
*user = NULL;
|
*user = NULL;
|
||||||
*passwd = NULL;
|
*passwd = NULL;
|
||||||
|
|
||||||
/* Look for the end of the protocol string. */
|
/* Look for the end of the scheme identifier. */
|
||||||
l = skip_proto (url);
|
l = url_skip_scheme (url);
|
||||||
if (!l)
|
if (!l)
|
||||||
return URLUNKNOWN;
|
return URLUNKNOWN;
|
||||||
/* Add protocol offset. */
|
|
||||||
url += l;
|
url += l;
|
||||||
/* Is there an `@' character? */
|
/* Is there an `@' character? */
|
||||||
for (p = url; *p && *p != '/'; p++)
|
for (p = url; *p && *p != '/'; p++)
|
||||||
@ -623,26 +624,27 @@ process_ftp_type (char *path)
|
|||||||
return '\0';
|
return '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the URL as fine-formed string, with a proper protocol, optional port
|
/* Recreate the URL string from the data in urlinfo. This can be used
|
||||||
number, directory and optional user/password. If `hide' is non-zero (as it
|
to create a "canonical" representation of the URL. If `hide' is
|
||||||
is when we're calling this on a URL we plan to print, but not when calling it
|
non-zero (as it is when we're calling this on a URL we plan to
|
||||||
to canonicalize a URL for use within the program), password will be hidden.
|
print, but not when calling it to canonicalize a URL for use within
|
||||||
The forbidden characters in the URL will be cleansed. */
|
the program), password will be hidden. The forbidden characters in
|
||||||
|
the URL will be cleansed. */
|
||||||
char *
|
char *
|
||||||
str_url (const struct urlinfo *u, int hide)
|
str_url (const struct urlinfo *u, int hide)
|
||||||
{
|
{
|
||||||
char *res, *host, *user, *passwd, *proto_name, *dir, *file;
|
char *res, *host, *user, *passwd, *scheme_name, *dir, *file;
|
||||||
int i, l, ln, lu, lh, lp, lf, ld;
|
int i, l, ln, lu, lh, lp, lf, ld;
|
||||||
unsigned short proto_default_port;
|
unsigned short default_port;
|
||||||
|
|
||||||
/* Look for the protocol name. */
|
/* Look for the scheme. */
|
||||||
for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
|
for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
|
||||||
if (sup_protos[i].ind == u->proto)
|
if (supported_schemes[i].scheme == u->scheme)
|
||||||
break;
|
break;
|
||||||
if (i == ARRAY_SIZE (sup_protos))
|
if (i == ARRAY_SIZE (supported_schemes))
|
||||||
return NULL;
|
return NULL;
|
||||||
proto_name = sup_protos[i].name;
|
scheme_name = supported_schemes[i].leading_string;
|
||||||
proto_default_port = sup_protos[i].port;
|
default_port = supported_schemes[i].default_port;
|
||||||
host = encode_string (u->host);
|
host = encode_string (u->host);
|
||||||
dir = encode_string (u->dir);
|
dir = encode_string (u->dir);
|
||||||
file = encode_string (u->file);
|
file = encode_string (u->file);
|
||||||
@ -660,7 +662,7 @@ str_url (const struct urlinfo *u, int hide)
|
|||||||
else
|
else
|
||||||
passwd = encode_string (u->passwd);
|
passwd = encode_string (u->passwd);
|
||||||
}
|
}
|
||||||
if (u->proto == URLFTP && *dir == '/')
|
if (u->scheme == SCHEME_FTP && *dir == '/')
|
||||||
{
|
{
|
||||||
char *tmp = (char *)xmalloc (strlen (dir) + 3);
|
char *tmp = (char *)xmalloc (strlen (dir) + 3);
|
||||||
/*sprintf (tmp, "%%2F%s", dir + 1);*/
|
/*sprintf (tmp, "%%2F%s", dir + 1);*/
|
||||||
@ -672,19 +674,19 @@ str_url (const struct urlinfo *u, int hide)
|
|||||||
dir = tmp;
|
dir = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
ln = strlen (proto_name);
|
ln = strlen (scheme_name);
|
||||||
lu = user ? strlen (user) : 0;
|
lu = user ? strlen (user) : 0;
|
||||||
lp = passwd ? strlen (passwd) : 0;
|
lp = passwd ? strlen (passwd) : 0;
|
||||||
lh = strlen (host);
|
lh = strlen (host);
|
||||||
ld = strlen (dir);
|
ld = strlen (dir);
|
||||||
lf = strlen (file);
|
lf = strlen (file);
|
||||||
res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
|
res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
|
||||||
/* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name,
|
/* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", scheme_name,
|
||||||
(user ? user : ""), (passwd ? ":" : ""),
|
(user ? user : ""), (passwd ? ":" : ""),
|
||||||
(passwd ? passwd : ""), (user ? "@" : ""),
|
(passwd ? passwd : ""), (user ? "@" : ""),
|
||||||
host, u->port, dir, *dir ? "/" : "", file); */
|
host, u->port, dir, *dir ? "/" : "", file); */
|
||||||
l = 0;
|
l = 0;
|
||||||
memcpy (res, proto_name, ln);
|
memcpy (res, scheme_name, ln);
|
||||||
l += ln;
|
l += ln;
|
||||||
if (user)
|
if (user)
|
||||||
{
|
{
|
||||||
@ -700,7 +702,7 @@ str_url (const struct urlinfo *u, int hide)
|
|||||||
}
|
}
|
||||||
memcpy (res + l, host, lh);
|
memcpy (res + l, host, lh);
|
||||||
l += lh;
|
l += lh;
|
||||||
if (u->port != proto_default_port)
|
if (u->port != default_port)
|
||||||
{
|
{
|
||||||
res[l++] = ':';
|
res[l++] = ':';
|
||||||
long_to_string (res + l, (long)u->port);
|
long_to_string (res + l, (long)u->port);
|
||||||
@ -1123,7 +1125,7 @@ find_last_char (const char *b, const char *e, char c)
|
|||||||
Either of the URIs may be absolute or relative, complete with the
|
Either of the URIs may be absolute or relative, complete with the
|
||||||
host name, or path only. This tries to behave "reasonably" in all
|
host name, or path only. This tries to behave "reasonably" in all
|
||||||
foreseeable cases. It employs little specific knowledge about
|
foreseeable cases. It employs little specific knowledge about
|
||||||
protocols or URL-specific stuff -- it just works on strings.
|
schemes or URL-specific stuff -- it just works on strings.
|
||||||
|
|
||||||
The parameters LINKLENGTH is useful if LINK is not zero-terminated.
|
The parameters LINKLENGTH is useful if LINK is not zero-terminated.
|
||||||
See uri_merge for a gentler interface to this functionality.
|
See uri_merge for a gentler interface to this functionality.
|
||||||
@ -1131,11 +1133,11 @@ find_last_char (const char *b, const char *e, char c)
|
|||||||
#### This function should handle `./' and `../' so that the evil
|
#### This function should handle `./' and `../' so that the evil
|
||||||
path_simplify can go. */
|
path_simplify can go. */
|
||||||
static char *
|
static char *
|
||||||
uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
|
uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
|
||||||
{
|
{
|
||||||
char *constr;
|
char *constr;
|
||||||
|
|
||||||
if (no_proto)
|
if (no_scheme)
|
||||||
{
|
{
|
||||||
const char *end = base + urlpath_length (base);
|
const char *end = base + urlpath_length (base);
|
||||||
|
|
||||||
@ -1252,7 +1254,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
|
|||||||
constr[span + linklength] = '\0';
|
constr[span + linklength] = '\0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else /* !no_proto */
|
else /* !no_scheme */
|
||||||
{
|
{
|
||||||
constr = strdupdelim (link, link + linklength);
|
constr = strdupdelim (link, link + linklength);
|
||||||
}
|
}
|
||||||
@ -1265,7 +1267,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
|
|||||||
char *
|
char *
|
||||||
uri_merge (const char *base, const char *link)
|
uri_merge (const char *base, const char *link)
|
||||||
{
|
{
|
||||||
return uri_merge_1 (base, link, strlen (link), !has_proto (link));
|
return uri_merge_1 (base, link, strlen (link), !url_has_scheme (link));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Optimize URL by host, destructively replacing u->host with realhost
|
/* Optimize URL by host, destructively replacing u->host with realhost
|
||||||
@ -1283,22 +1285,28 @@ opt_url (struct urlinfo *u)
|
|||||||
u->url = str_url (u, 0);
|
u->url = str_url (u, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns proxy host address, in accordance with PROTO. */
|
/* Returns proxy host address, in accordance with SCHEME. */
|
||||||
char *
|
char *
|
||||||
getproxy (uerr_t proto)
|
getproxy (enum url_scheme scheme)
|
||||||
{
|
{
|
||||||
char *proxy;
|
char *proxy = NULL;
|
||||||
|
|
||||||
if (proto == URLHTTP)
|
switch (scheme)
|
||||||
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
|
{
|
||||||
else if (proto == URLFTP)
|
case SCHEME_HTTP:
|
||||||
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
|
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
|
||||||
|
break;
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
else if (proto == URLHTTPS)
|
case SCHEME_HTTPS:
|
||||||
proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
|
proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
|
||||||
#endif /* HAVE_SSL */
|
break;
|
||||||
else
|
#endif
|
||||||
proxy = NULL;
|
case SCHEME_FTP:
|
||||||
|
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
|
||||||
|
break;
|
||||||
|
case SCHEME_INVALID:
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (!proxy || !*proxy)
|
if (!proxy || !*proxy)
|
||||||
return NULL;
|
return NULL;
|
||||||
return proxy;
|
return proxy;
|
||||||
|
19
src/url.h
19
src/url.h
@ -25,12 +25,21 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|||||||
#define DEFAULT_FTP_PORT 21
|
#define DEFAULT_FTP_PORT 21
|
||||||
#define DEFAULT_HTTPS_PORT 443
|
#define DEFAULT_HTTPS_PORT 443
|
||||||
|
|
||||||
|
enum url_scheme {
|
||||||
|
SCHEME_HTTP,
|
||||||
|
#ifdef HAVE_SSL
|
||||||
|
SCHEME_HTTPS,
|
||||||
|
#endif
|
||||||
|
SCHEME_FTP,
|
||||||
|
SCHEME_INVALID
|
||||||
|
};
|
||||||
|
|
||||||
/* Structure containing info on a URL. */
|
/* Structure containing info on a URL. */
|
||||||
struct urlinfo
|
struct urlinfo
|
||||||
{
|
{
|
||||||
char *url; /* Unchanged URL */
|
char *url; /* Unchanged URL */
|
||||||
uerr_t proto; /* URL protocol */
|
enum url_scheme scheme; /* URL scheme */
|
||||||
|
|
||||||
char *host; /* Extracted hostname */
|
char *host; /* Extracted hostname */
|
||||||
unsigned short port;
|
unsigned short port;
|
||||||
char ftp_type;
|
char ftp_type;
|
||||||
@ -97,10 +106,10 @@ char *encode_string PARAMS ((const char *));
|
|||||||
|
|
||||||
struct urlinfo *newurl PARAMS ((void));
|
struct urlinfo *newurl PARAMS ((void));
|
||||||
void freeurl PARAMS ((struct urlinfo *, int));
|
void freeurl PARAMS ((struct urlinfo *, int));
|
||||||
uerr_t urlproto PARAMS ((const char *));
|
enum url_scheme url_detect_scheme PARAMS ((const char *));
|
||||||
int skip_proto PARAMS ((const char *));
|
int url_skip_scheme PARAMS ((const char *));
|
||||||
int has_proto PARAMS ((const char *));
|
int url_has_scheme PARAMS ((const char *));
|
||||||
int skip_uname PARAMS ((const char *));
|
int url_skip_uname PARAMS ((const char *));
|
||||||
|
|
||||||
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
|
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
|
||||||
char *str_url PARAMS ((const struct urlinfo *, int));
|
char *str_url PARAMS ((const struct urlinfo *, int));
|
||||||
|
Loading…
Reference in New Issue
Block a user