mirror of
https://github.com/mirror/wget.git
synced 2024-12-29 14:30:48 +08:00
[svn] Parse Content-Disposition better. Implement memrchr where missing.
This commit is contained in:
parent
aa07e689f2
commit
c90e4f9c1a
@ -1,3 +1,7 @@
|
||||
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* configure.in: Check for memrchr.
|
||||
|
||||
2005-11-19 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* configure.in: Check for uintptr_t.
|
||||
|
@ -206,7 +206,7 @@ AC_FUNC_ALLOCA
|
||||
AC_FUNC_MMAP
|
||||
AC_FUNC_FSEEKO
|
||||
AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48)
|
||||
AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp)
|
||||
AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp memrchr)
|
||||
|
||||
dnl We expect to have these functions on Unix-like systems configure
|
||||
dnl runs on. The defines are provided to get them in config.h.in so
|
||||
|
@ -1,3 +1,14 @@
|
||||
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* url.c (find_last_char): Define in terms of memrchr.
|
||||
|
||||
* cmpt.c (memrchr): Define it on systems that don't have it.
|
||||
|
||||
* http.c (extract_param): New function for parsing header values
|
||||
with parameters.
|
||||
(parse_content_disposition): Use it. Don't allow slashes and
|
||||
backslashes in the file name.
|
||||
|
||||
2006-02-27 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* url.c (path_simplify): Don't preserve ".." at beginning of path.
|
||||
|
20
src/cmpt.c
20
src/cmpt.c
@ -111,10 +111,28 @@ strncasecmp (const char *s1, const char *s2, size_t n)
|
||||
return c1 - c2;
|
||||
}
|
||||
#endif /* not HAVE_STRNCASECMP */
|
||||
|
||||
#ifndef HAVE_MEMRCHR
|
||||
/* memrchr is a GNU extension. It is like the memchr function, except
|
||||
that it searches backwards from the end of the n bytes pointed to
|
||||
by s instead of forwards from the front. */
|
||||
|
||||
void *
|
||||
memrchr (const void *s, int c, size_t n)
|
||||
{
|
||||
const char *b = s;
|
||||
const char *e = b + n;
|
||||
while (e > b)
|
||||
if (*--e == c)
|
||||
return (void *) e;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* strptime is required by POSIX, but it is missing from Windows,
|
||||
which means we must keep a fallback implementation. It is
|
||||
reportedly missing or broken on many older systems as well. */
|
||||
reportedly missing or broken on many older Unix systems as well, so
|
||||
it's good to have around. */
|
||||
|
||||
#ifndef HAVE_STRPTIME
|
||||
/* From GNU libc 2.1.3. */
|
||||
|
110
src/http.c
110
src/http.c
@ -894,37 +894,101 @@ extract_param_value_delim (const char *begin, const char *end,
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Parse the `Content-Disposition' header and extract the information it
|
||||
contains. Returns true if successful, false otherwise. */
|
||||
typedef struct {
|
||||
/* A token consists of characters in the [b, e) range. */
|
||||
const char *b, *e;
|
||||
} param_token;
|
||||
|
||||
/* Extract a parameter from the HTTP header at *SOURCE and advance
|
||||
*SOURCE to the next parameter. Return false when there are no more
|
||||
parameters to extract. The name of the parameter is returned in
|
||||
NAME, and the value in VALUE. If the parameter has no value, the
|
||||
token's value is zeroed out.
|
||||
|
||||
For example, if *SOURCE points to the string "attachment;
|
||||
filename=\"foo bar\"", the first call to this function will return
|
||||
the token named "attachment" and no value, and the second call will
|
||||
return the token named "filename" and value "foo bar". The third
|
||||
call will return false, indicating no more valid tokens. */
|
||||
|
||||
static bool
|
||||
parse_content_disposition (const char *hdrval, char **filename)
|
||||
extract_param (const char **source, param_token *name, param_token *value)
|
||||
{
|
||||
const char *b = hdrval; /* b - begin */
|
||||
const char *e = hdrval; /* e - end */
|
||||
const char *p = *source;
|
||||
|
||||
assert (hdrval);
|
||||
assert (filename);
|
||||
while (ISSPACE (*p)) ++p;
|
||||
if (!*p)
|
||||
return false; /* nothing more to extract */
|
||||
|
||||
for (; *e; ++e)
|
||||
/* Extract name. */
|
||||
name->b = p;
|
||||
while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p;
|
||||
name->e = p;
|
||||
while (ISSPACE (*p)) ++p;
|
||||
if (*p == ';' || !*p) /* no value */
|
||||
{
|
||||
if (*e == ';'
|
||||
&& e > b)
|
||||
{
|
||||
/* process chars b->e-1 */
|
||||
if (true == extract_param_value_delim (b, e - 1, "filename", filename))
|
||||
return true;
|
||||
|
||||
b = e + 1;
|
||||
}
|
||||
xzero (*value);
|
||||
if (*p == ';') ++p;
|
||||
*source = p;
|
||||
return true;
|
||||
}
|
||||
if (*p != '=')
|
||||
return false; /* error */
|
||||
|
||||
if (b != e)
|
||||
/* *p is '=', extract value */
|
||||
++p;
|
||||
while (ISSPACE (*p)) ++p;
|
||||
if (*p == '"') /* quoted */
|
||||
{
|
||||
/* process chars b->e */
|
||||
if (true == extract_param_value_delim (b, e, "filename", filename))
|
||||
value->b = ++p;
|
||||
while (*p && *p != '"') ++p;
|
||||
if (!*p)
|
||||
return false;
|
||||
value->e = p++;
|
||||
/* Currently at closing quote; find the end of param. */
|
||||
while (ISSPACE (*p)) ++p;
|
||||
while (*p && *p != ';') ++p;
|
||||
if (*p == ';')
|
||||
++p;
|
||||
else if (*p)
|
||||
/* garbage after closed quote, e.g. foo="bar"baz */
|
||||
return false;
|
||||
}
|
||||
else /* unquoted */
|
||||
{
|
||||
value->b = p;
|
||||
while (*p && *p != ';') ++p;
|
||||
value->e = p;
|
||||
while (value->e != value->b && ISSPACE (value->e[-1]))
|
||||
--value->e;
|
||||
if (*p == ';') ++p;
|
||||
}
|
||||
*source = p;
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef MAX
|
||||
#define MAX(p, q) ((p) > (q) ? (p) : (q))
|
||||
|
||||
static bool
|
||||
parse_content_disposition (const char *hdr, char **filename)
|
||||
{
|
||||
param_token name, value;
|
||||
while (extract_param (&hdr, &name, &value))
|
||||
if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
|
||||
{
|
||||
/* Make the file name begin at the last slash or backslash. */
|
||||
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
|
||||
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
|
||||
if (last_slash && last_bs)
|
||||
value.b = 1 + MAX (last_slash, last_bs);
|
||||
else if (last_slash || last_bs)
|
||||
value.b = 1 + (last_slash ? last_slash : last_bs);
|
||||
if (value.b == value.e)
|
||||
continue;
|
||||
*filename = strdupdelim (value.b, value.e);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1687,7 +1751,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
||||
{
|
||||
/* Honor Content-Disposition whether possible. */
|
||||
if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
|
||||
|| false == parse_content_disposition (hdrval, &hs->local_file))
|
||||
|| !parse_content_disposition (hdrval, &hs->local_file))
|
||||
{
|
||||
/* Choose filename according to URL name. */
|
||||
hs->local_file = url_file_name (u);
|
||||
|
@ -135,6 +135,9 @@ char *strptime ();
|
||||
# include <time.h>
|
||||
time_t timegm (struct tm *);
|
||||
#endif
|
||||
#ifndef HAVE_MEMRCHR
|
||||
void *memrchr (const void *, int, size_t);
|
||||
#endif
|
||||
|
||||
/* These are defined in snprintf.c. It would be nice to have an
|
||||
snprintf.h, though. */
|
||||
|
13
src/url.c
13
src/url.c
@ -1582,17 +1582,8 @@ path_end (const char *url)
|
||||
}
|
||||
|
||||
/* Find the last occurrence of character C in the range [b, e), or
|
||||
NULL, if none are present. We might want to use memrchr (a GNU
|
||||
extension) under GNU libc. */
|
||||
|
||||
static const char *
|
||||
find_last_char (const char *b, const char *e, char c)
|
||||
{
|
||||
for (; e > b; e--)
|
||||
if (*e == c)
|
||||
return e;
|
||||
return NULL;
|
||||
}
|
||||
NULL, if none are present. */
|
||||
#define find_last_char(b, e, c) memrchr ((b), (c), (e) - (b))
|
||||
|
||||
/* Merge BASE with LINK and return the resulting URI.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user