mirror of
https://github.com/mirror/wget.git
synced 2025-03-04 14:50:35 +08:00
Add gzip Content-Encoding decompression
* src/http.c (struct http_stat): Add remote_encoding field. (read_response_body): Enable gzip decompression. (initialize_request): Send gzip Accept-Encoding header. (gethttp): Decompress files with gzip Content-Encoding. * src/retr.c: include zlib.h. (zalloc): New function. (zfree): New function. (fd_read_body): Decompress gzip data. * src/retr.h (fd_read_body enum): Add rb_compressed_gzip flag.
This commit is contained in:
parent
b543dfe783
commit
c451eec155
39
src/http.c
39
src/http.c
@ -1581,6 +1581,7 @@ struct http_stat
|
||||
#endif
|
||||
|
||||
encoding_t local_encoding; /* the encoding of the local file */
|
||||
encoding_t remote_encoding; /* the encoding of the remote file */
|
||||
|
||||
bool temporary; /* downloading a temporary file */
|
||||
};
|
||||
@ -1693,6 +1694,9 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
||||
if (chunked_transfer_encoding)
|
||||
flags |= rb_chunked_transfer_encoding;
|
||||
|
||||
if (hs->remote_encoding == ENC_GZIP)
|
||||
flags |= rb_compressed_gzip;
|
||||
|
||||
hs->len = hs->restval;
|
||||
hs->rd_size = 0;
|
||||
/* Download the response body and write it to fp.
|
||||
@ -1886,7 +1890,12 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u
|
||||
rel_value);
|
||||
SET_USER_AGENT (req);
|
||||
request_set_header (req, "Accept", "*/*", rel_none);
|
||||
request_set_header (req, "Accept-Encoding", "identity", rel_none);
|
||||
#ifdef HAVE_LIBZ
|
||||
if (opt.compression != compression_none)
|
||||
request_set_header (req, "Accept-Encoding", "gzip", rel_none);
|
||||
else
|
||||
#endif
|
||||
request_set_header (req, "Accept-Encoding", "identity", rel_none);
|
||||
|
||||
/* Find the username with priority */
|
||||
if (u->user)
|
||||
@ -3203,6 +3212,7 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
|
||||
hs->error = NULL;
|
||||
hs->message = NULL;
|
||||
hs->local_encoding = ENC_NONE;
|
||||
hs->remote_encoding = ENC_NONE;
|
||||
|
||||
conn = u;
|
||||
|
||||
@ -3694,6 +3704,30 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
|
||||
DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
|
||||
hs->local_encoding = ENC_NONE;
|
||||
}
|
||||
#ifdef HAVE_LIBZ
|
||||
else if (hs->local_encoding == ENC_GZIP
|
||||
&& opt.compression != compression_none)
|
||||
{
|
||||
/* Make sure the Content-Type is not gzip before decompressing */
|
||||
const char * p = strchr (type, '/');
|
||||
if (p == NULL)
|
||||
{
|
||||
hs->remote_encoding = ENC_GZIP;
|
||||
hs->local_encoding = ENC_NONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
p++;
|
||||
if (c_tolower(p[0]) == 'x' && p[1] == '-')
|
||||
p += 2;
|
||||
if (0 != c_strcasecmp (p, "gzip"))
|
||||
{
|
||||
hs->remote_encoding = ENC_GZIP;
|
||||
hs->local_encoding = ENC_NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* 20x responses are counted among successful by default. */
|
||||
@ -3930,6 +3964,9 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
|
||||
}
|
||||
if (contlen == -1)
|
||||
hs->contlen = -1;
|
||||
/* If the response is gzipped, the uncompressed size is unknown. */
|
||||
else if (hs->remote_encoding == ENC_GZIP)
|
||||
hs->contlen = -1;
|
||||
else
|
||||
hs->contlen = contlen + contrange;
|
||||
|
||||
|
143
src/retr.c
143
src/retr.c
@ -41,6 +41,10 @@ as that of the covered work. */
|
||||
# include <unixio.h> /* For delete(). */
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBZ
|
||||
# include <zlib.h>
|
||||
#endif
|
||||
|
||||
#include "exits.h"
|
||||
#include "utils.h"
|
||||
#include "retr.h"
|
||||
@ -84,6 +88,22 @@ limit_bandwidth_reset (void)
|
||||
xzero (limit_data);
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBZ
|
||||
static voidpf
|
||||
zalloc (voidpf opaque, unsigned int items, unsigned int size)
|
||||
{
|
||||
(void) opaque;
|
||||
return (voidpf) xcalloc (items, size);
|
||||
}
|
||||
|
||||
static void
|
||||
zfree (voidpf opaque, voidpf address)
|
||||
{
|
||||
(void) opaque;
|
||||
xfree (address);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Limit the bandwidth by pausing the download for an amount of time.
|
||||
BYTES is the number of bytes received from the network, and TIMER
|
||||
is the timer that started at the beginning of download. */
|
||||
@ -257,6 +277,44 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
||||
wgint sum_written = 0;
|
||||
wgint remaining_chunk_size = 0;
|
||||
|
||||
#ifdef HAVE_LIBZ
|
||||
/* try to minimize the number of calls to inflate() and write_data() per
|
||||
call to fd_read() */
|
||||
unsigned int gzbufsize = dlbufsize * 4;
|
||||
char *gzbuf = NULL;
|
||||
z_stream gzstream;
|
||||
|
||||
if (flags & rb_compressed_gzip)
|
||||
{
|
||||
gzbuf = xmalloc (gzbufsize);
|
||||
if (gzbuf != NULL)
|
||||
{
|
||||
gzstream.zalloc = zalloc;
|
||||
gzstream.zfree = zfree;
|
||||
gzstream.opaque = Z_NULL;
|
||||
gzstream.next_in = Z_NULL;
|
||||
gzstream.avail_in = 0;
|
||||
|
||||
#define GZIP_DETECT 32 /* gzip format detection */
|
||||
#define GZIP_WINDOW 15 /* logarithmic window size (default: 15) */
|
||||
ret = inflateInit2 (&gzstream, GZIP_DETECT | GZIP_WINDOW);
|
||||
if (ret != Z_OK)
|
||||
{
|
||||
xfree (gzbuf);
|
||||
errno = (ret == Z_MEM_ERROR) ? ENOMEM : EINVAL;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
errno = ENOMEM;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (flags & rb_skip_startpos)
|
||||
skip = startpos;
|
||||
|
||||
@ -383,12 +441,64 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
||||
int write_res;
|
||||
|
||||
sum_read += ret;
|
||||
write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
||||
if (write_res < 0)
|
||||
|
||||
#ifdef HAVE_LIBZ
|
||||
if (gzbuf != NULL)
|
||||
{
|
||||
ret = (write_res == -3) ? -3 : -2;
|
||||
goto out;
|
||||
int err;
|
||||
int towrite;
|
||||
gzstream.avail_in = ret;
|
||||
gzstream.next_in = (unsigned char *) dlbuf;
|
||||
|
||||
do
|
||||
{
|
||||
gzstream.avail_out = gzbufsize;
|
||||
gzstream.next_out = (unsigned char *) gzbuf;
|
||||
|
||||
err = inflate (&gzstream, Z_NO_FLUSH);
|
||||
|
||||
switch (err)
|
||||
{
|
||||
case Z_MEM_ERROR:
|
||||
errno = ENOMEM;
|
||||
ret = -1;
|
||||
goto out;
|
||||
case Z_NEED_DICT:
|
||||
case Z_DATA_ERROR:
|
||||
errno = EINVAL;
|
||||
ret = -1;
|
||||
goto out;
|
||||
case Z_STREAM_END:
|
||||
if (exact && sum_read != toread)
|
||||
{
|
||||
DEBUGP(("zlib stream ended unexpectedly after "
|
||||
"%ld/%ld bytes\n", sum_read, toread));
|
||||
}
|
||||
}
|
||||
|
||||
towrite = gzbufsize - gzstream.avail_out;
|
||||
write_res = write_data (out, out2, gzbuf, towrite, &skip,
|
||||
&sum_written);
|
||||
if (write_res < 0)
|
||||
{
|
||||
ret = (write_res == -3) ? -3 : -2;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
while (gzstream.avail_out == 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
write_res = write_data (out, out2, dlbuf, ret, &skip,
|
||||
&sum_written);
|
||||
if (write_res < 0)
|
||||
{
|
||||
ret = (write_res == -3) ? -3 : -2;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (chunked)
|
||||
{
|
||||
remaining_chunk_size -= ret;
|
||||
@ -433,6 +543,31 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
|
||||
if (timer)
|
||||
ptimer_destroy (timer);
|
||||
|
||||
#ifdef HAVE_LIBZ
|
||||
if (gzbuf != NULL)
|
||||
{
|
||||
int err = inflateEnd (&gzstream);
|
||||
if (ret >= 0)
|
||||
{
|
||||
/* with compression enabled, ret must be 0 if successful */
|
||||
if (err == Z_OK)
|
||||
ret = 0;
|
||||
else
|
||||
{
|
||||
errno = EINVAL;
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
xfree (gzbuf);
|
||||
|
||||
if (gzstream.total_in != sum_read)
|
||||
{
|
||||
DEBUGP(("zlib read size differs from raw read size (%lu/%lu)\n",
|
||||
gzstream.total_in, sum_read));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (qtyread)
|
||||
*qtyread += sum_read;
|
||||
if (qtywritten)
|
||||
|
@ -49,7 +49,9 @@ enum {
|
||||
rb_skip_startpos = 2,
|
||||
|
||||
/* Used by HTTP/HTTPS*/
|
||||
rb_chunked_transfer_encoding = 4
|
||||
rb_chunked_transfer_encoding = 4,
|
||||
|
||||
rb_compressed_gzip = 8
|
||||
};
|
||||
|
||||
int fd_read_body (const char *, int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
|
||||
|
Loading…
Reference in New Issue
Block a user