mirror of
https://github.com/mirror/wget.git
synced 2025-03-04 23:00:22 +08:00
Add gzip Content-Encoding decompression
* src/http.c (struct http_stat): Add remote_encoding field. (read_response_body): Enable gzip decompression. (initialize_request): Send gzip Accept-Encoding header. (gethttp): Decompress files with gzip Content-Encoding. * src/retr.c: include zlib.h. (zalloc): New function. (zfree): New function. (fd_read_body): Decompress gzip data. * src/retr.h (fd_read_body enum): Add rb_compressed_gzip flag.
This commit is contained in:
parent
b543dfe783
commit
c451eec155
39
src/http.c
39
src/http.c
@ -1581,6 +1581,7 @@ struct http_stat
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
encoding_t local_encoding; /* the encoding of the local file */
|
encoding_t local_encoding; /* the encoding of the local file */
|
||||||
|
encoding_t remote_encoding; /* the encoding of the remote file */
|
||||||
|
|
||||||
bool temporary; /* downloading a temporary file */
|
bool temporary; /* downloading a temporary file */
|
||||||
};
|
};
|
||||||
@ -1693,6 +1694,9 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
|||||||
if (chunked_transfer_encoding)
|
if (chunked_transfer_encoding)
|
||||||
flags |= rb_chunked_transfer_encoding;
|
flags |= rb_chunked_transfer_encoding;
|
||||||
|
|
||||||
|
if (hs->remote_encoding == ENC_GZIP)
|
||||||
|
flags |= rb_compressed_gzip;
|
||||||
|
|
||||||
hs->len = hs->restval;
|
hs->len = hs->restval;
|
||||||
hs->rd_size = 0;
|
hs->rd_size = 0;
|
||||||
/* Download the response body and write it to fp.
|
/* Download the response body and write it to fp.
|
||||||
@ -1886,7 +1890,12 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u
|
|||||||
rel_value);
|
rel_value);
|
||||||
SET_USER_AGENT (req);
|
SET_USER_AGENT (req);
|
||||||
request_set_header (req, "Accept", "*/*", rel_none);
|
request_set_header (req, "Accept", "*/*", rel_none);
|
||||||
request_set_header (req, "Accept-Encoding", "identity", rel_none);
|
#ifdef HAVE_LIBZ
|
||||||
|
if (opt.compression != compression_none)
|
||||||
|
request_set_header (req, "Accept-Encoding", "gzip", rel_none);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
request_set_header (req, "Accept-Encoding", "identity", rel_none);
|
||||||
|
|
||||||
/* Find the username with priority */
|
/* Find the username with priority */
|
||||||
if (u->user)
|
if (u->user)
|
||||||
@ -3203,6 +3212,7 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
|
|||||||
hs->error = NULL;
|
hs->error = NULL;
|
||||||
hs->message = NULL;
|
hs->message = NULL;
|
||||||
hs->local_encoding = ENC_NONE;
|
hs->local_encoding = ENC_NONE;
|
||||||
|
hs->remote_encoding = ENC_NONE;
|
||||||
|
|
||||||
conn = u;
|
conn = u;
|
||||||
|
|
||||||
@ -3694,6 +3704,30 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
|
|||||||
DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
|
DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
|
||||||
hs->local_encoding = ENC_NONE;
|
hs->local_encoding = ENC_NONE;
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_LIBZ
|
||||||
|
else if (hs->local_encoding == ENC_GZIP
|
||||||
|
&& opt.compression != compression_none)
|
||||||
|
{
|
||||||
|
/* Make sure the Content-Type is not gzip before decompressing */
|
||||||
|
const char * p = strchr (type, '/');
|
||||||
|
if (p == NULL)
|
||||||
|
{
|
||||||
|
hs->remote_encoding = ENC_GZIP;
|
||||||
|
hs->local_encoding = ENC_NONE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p++;
|
||||||
|
if (c_tolower(p[0]) == 'x' && p[1] == '-')
|
||||||
|
p += 2;
|
||||||
|
if (0 != c_strcasecmp (p, "gzip"))
|
||||||
|
{
|
||||||
|
hs->remote_encoding = ENC_GZIP;
|
||||||
|
hs->local_encoding = ENC_NONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 20x responses are counted among successful by default. */
|
/* 20x responses are counted among successful by default. */
|
||||||
@ -3930,6 +3964,9 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
|
|||||||
}
|
}
|
||||||
if (contlen == -1)
|
if (contlen == -1)
|
||||||
hs->contlen = -1;
|
hs->contlen = -1;
|
||||||
|
/* If the response is gzipped, the uncompressed size is unknown. */
|
||||||
|
else if (hs->remote_encoding == ENC_GZIP)
|
||||||
|
hs->contlen = -1;
|
||||||
else
|
else
|
||||||
hs->contlen = contlen + contrange;
|
hs->contlen = contlen + contrange;
|
||||||
|
|
||||||
|
143
src/retr.c
143
src/retr.c
@ -41,6 +41,10 @@ as that of the covered work. */
|
|||||||
# include <unixio.h> /* For delete(). */
|
# include <unixio.h> /* For delete(). */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_LIBZ
|
||||||
|
# include <zlib.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "exits.h"
|
#include "exits.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "retr.h"
|
#include "retr.h"
|
||||||
@ -84,6 +88,22 @@ limit_bandwidth_reset (void)
|
|||||||
xzero (limit_data);
|
xzero (limit_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_LIBZ
|
||||||
|
static voidpf
|
||||||
|
zalloc (voidpf opaque, unsigned int items, unsigned int size)
|
||||||
|
{
|
||||||
|
(void) opaque;
|
||||||
|
return (voidpf) xcalloc (items, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
zfree (voidpf opaque, voidpf address)
|
||||||
|
{
|
||||||
|
(void) opaque;
|
||||||
|
xfree (address);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Limit the bandwidth by pausing the download for an amount of time.
|
/* Limit the bandwidth by pausing the download for an amount of time.
|
||||||
BYTES is the number of bytes received from the network, and TIMER
|
BYTES is the number of bytes received from the network, and TIMER
|
||||||
is the timer that started at the beginning of download. */
|
is the timer that started at the beginning of download. */
|
||||||
@ -257,6 +277,44 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|||||||
wgint sum_written = 0;
|
wgint sum_written = 0;
|
||||||
wgint remaining_chunk_size = 0;
|
wgint remaining_chunk_size = 0;
|
||||||
|
|
||||||
|
#ifdef HAVE_LIBZ
|
||||||
|
/* try to minimize the number of calls to inflate() and write_data() per
|
||||||
|
call to fd_read() */
|
||||||
|
unsigned int gzbufsize = dlbufsize * 4;
|
||||||
|
char *gzbuf = NULL;
|
||||||
|
z_stream gzstream;
|
||||||
|
|
||||||
|
if (flags & rb_compressed_gzip)
|
||||||
|
{
|
||||||
|
gzbuf = xmalloc (gzbufsize);
|
||||||
|
if (gzbuf != NULL)
|
||||||
|
{
|
||||||
|
gzstream.zalloc = zalloc;
|
||||||
|
gzstream.zfree = zfree;
|
||||||
|
gzstream.opaque = Z_NULL;
|
||||||
|
gzstream.next_in = Z_NULL;
|
||||||
|
gzstream.avail_in = 0;
|
||||||
|
|
||||||
|
#define GZIP_DETECT 32 /* gzip format detection */
|
||||||
|
#define GZIP_WINDOW 15 /* logarithmic window size (default: 15) */
|
||||||
|
ret = inflateInit2 (&gzstream, GZIP_DETECT | GZIP_WINDOW);
|
||||||
|
if (ret != Z_OK)
|
||||||
|
{
|
||||||
|
xfree (gzbuf);
|
||||||
|
errno = (ret == Z_MEM_ERROR) ? ENOMEM : EINVAL;
|
||||||
|
ret = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
errno = ENOMEM;
|
||||||
|
ret = -1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (flags & rb_skip_startpos)
|
if (flags & rb_skip_startpos)
|
||||||
skip = startpos;
|
skip = startpos;
|
||||||
|
|
||||||
@ -383,12 +441,64 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|||||||
int write_res;
|
int write_res;
|
||||||
|
|
||||||
sum_read += ret;
|
sum_read += ret;
|
||||||
write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
|
||||||
if (write_res < 0)
|
#ifdef HAVE_LIBZ
|
||||||
|
if (gzbuf != NULL)
|
||||||
{
|
{
|
||||||
ret = (write_res == -3) ? -3 : -2;
|
int err;
|
||||||
goto out;
|
int towrite;
|
||||||
|
gzstream.avail_in = ret;
|
||||||
|
gzstream.next_in = (unsigned char *) dlbuf;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
gzstream.avail_out = gzbufsize;
|
||||||
|
gzstream.next_out = (unsigned char *) gzbuf;
|
||||||
|
|
||||||
|
err = inflate (&gzstream, Z_NO_FLUSH);
|
||||||
|
|
||||||
|
switch (err)
|
||||||
|
{
|
||||||
|
case Z_MEM_ERROR:
|
||||||
|
errno = ENOMEM;
|
||||||
|
ret = -1;
|
||||||
|
goto out;
|
||||||
|
case Z_NEED_DICT:
|
||||||
|
case Z_DATA_ERROR:
|
||||||
|
errno = EINVAL;
|
||||||
|
ret = -1;
|
||||||
|
goto out;
|
||||||
|
case Z_STREAM_END:
|
||||||
|
if (exact && sum_read != toread)
|
||||||
|
{
|
||||||
|
DEBUGP(("zlib stream ended unexpectedly after "
|
||||||
|
"%ld/%ld bytes\n", sum_read, toread));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
towrite = gzbufsize - gzstream.avail_out;
|
||||||
|
write_res = write_data (out, out2, gzbuf, towrite, &skip,
|
||||||
|
&sum_written);
|
||||||
|
if (write_res < 0)
|
||||||
|
{
|
||||||
|
ret = (write_res == -3) ? -3 : -2;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (gzstream.avail_out == 0);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
write_res = write_data (out, out2, dlbuf, ret, &skip,
|
||||||
|
&sum_written);
|
||||||
|
if (write_res < 0)
|
||||||
|
{
|
||||||
|
ret = (write_res == -3) ? -3 : -2;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (chunked)
|
if (chunked)
|
||||||
{
|
{
|
||||||
remaining_chunk_size -= ret;
|
remaining_chunk_size -= ret;
|
||||||
@ -433,6 +543,31 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
|||||||
if (timer)
|
if (timer)
|
||||||
ptimer_destroy (timer);
|
ptimer_destroy (timer);
|
||||||
|
|
||||||
|
#ifdef HAVE_LIBZ
|
||||||
|
if (gzbuf != NULL)
|
||||||
|
{
|
||||||
|
int err = inflateEnd (&gzstream);
|
||||||
|
if (ret >= 0)
|
||||||
|
{
|
||||||
|
/* with compression enabled, ret must be 0 if successful */
|
||||||
|
if (err == Z_OK)
|
||||||
|
ret = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
errno = EINVAL;
|
||||||
|
ret = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xfree (gzbuf);
|
||||||
|
|
||||||
|
if (gzstream.total_in != sum_read)
|
||||||
|
{
|
||||||
|
DEBUGP(("zlib read size differs from raw read size (%lu/%lu)\n",
|
||||||
|
gzstream.total_in, sum_read));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (qtyread)
|
if (qtyread)
|
||||||
*qtyread += sum_read;
|
*qtyread += sum_read;
|
||||||
if (qtywritten)
|
if (qtywritten)
|
||||||
|
@ -49,7 +49,9 @@ enum {
|
|||||||
rb_skip_startpos = 2,
|
rb_skip_startpos = 2,
|
||||||
|
|
||||||
/* Used by HTTP/HTTPS*/
|
/* Used by HTTP/HTTPS*/
|
||||||
rb_chunked_transfer_encoding = 4
|
rb_chunked_transfer_encoding = 4,
|
||||||
|
|
||||||
|
rb_compressed_gzip = 8
|
||||||
};
|
};
|
||||||
|
|
||||||
int fd_read_body (const char *, int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
|
int fd_read_body (const char *, int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
|
||||||
|
Loading…
Reference in New Issue
Block a user