Add gzip Content-Encoding decompression

* src/http.c (struct http_stat): Add remote_encoding field.
(read_response_body): Enable gzip decompression.
(initialize_request): Send gzip Accept-Encoding header.
(gethttp): Decompress files with gzip Content-Encoding.
* src/retr.c: include zlib.h.
(zalloc): New function.
(zfree): New function.
(fd_read_body): Decompress gzip data.
* src/retr.h (fd_read_body enum): Add rb_compressed_gzip flag.
This commit is contained in:
Tim Schlueter 2017-07-24 23:24:05 -07:00 committed by Tim Rühsen
parent b543dfe783
commit c451eec155
3 changed files with 180 additions and 6 deletions

View File

@ -1581,6 +1581,7 @@ struct http_stat
#endif #endif
encoding_t local_encoding; /* the encoding of the local file */ encoding_t local_encoding; /* the encoding of the local file */
encoding_t remote_encoding; /* the encoding of the remote file */
bool temporary; /* downloading a temporary file */ bool temporary; /* downloading a temporary file */
}; };
@ -1693,6 +1694,9 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
if (chunked_transfer_encoding) if (chunked_transfer_encoding)
flags |= rb_chunked_transfer_encoding; flags |= rb_chunked_transfer_encoding;
if (hs->remote_encoding == ENC_GZIP)
flags |= rb_compressed_gzip;
hs->len = hs->restval; hs->len = hs->restval;
hs->rd_size = 0; hs->rd_size = 0;
/* Download the response body and write it to fp. /* Download the response body and write it to fp.
@ -1886,7 +1890,12 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u
rel_value); rel_value);
SET_USER_AGENT (req); SET_USER_AGENT (req);
request_set_header (req, "Accept", "*/*", rel_none); request_set_header (req, "Accept", "*/*", rel_none);
request_set_header (req, "Accept-Encoding", "identity", rel_none); #ifdef HAVE_LIBZ
if (opt.compression != compression_none)
request_set_header (req, "Accept-Encoding", "gzip", rel_none);
else
#endif
request_set_header (req, "Accept-Encoding", "identity", rel_none);
/* Find the username with priority */ /* Find the username with priority */
if (u->user) if (u->user)
@ -3203,6 +3212,7 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
hs->error = NULL; hs->error = NULL;
hs->message = NULL; hs->message = NULL;
hs->local_encoding = ENC_NONE; hs->local_encoding = ENC_NONE;
hs->remote_encoding = ENC_NONE;
conn = u; conn = u;
@ -3694,6 +3704,30 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval)); DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
hs->local_encoding = ENC_NONE; hs->local_encoding = ENC_NONE;
} }
#ifdef HAVE_LIBZ
else if (hs->local_encoding == ENC_GZIP
&& opt.compression != compression_none)
{
/* Make sure the Content-Type is not gzip before decompressing */
const char * p = strchr (type, '/');
if (p == NULL)
{
hs->remote_encoding = ENC_GZIP;
hs->local_encoding = ENC_NONE;
}
else
{
p++;
if (c_tolower(p[0]) == 'x' && p[1] == '-')
p += 2;
if (0 != c_strcasecmp (p, "gzip"))
{
hs->remote_encoding = ENC_GZIP;
hs->local_encoding = ENC_NONE;
}
}
}
#endif
} }
/* 20x responses are counted among successful by default. */ /* 20x responses are counted among successful by default. */
@ -3930,6 +3964,9 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs,
} }
if (contlen == -1) if (contlen == -1)
hs->contlen = -1; hs->contlen = -1;
/* If the response is gzipped, the uncompressed size is unknown. */
else if (hs->remote_encoding == ENC_GZIP)
hs->contlen = -1;
else else
hs->contlen = contlen + contrange; hs->contlen = contlen + contrange;

View File

@ -41,6 +41,10 @@ as that of the covered work. */
# include <unixio.h> /* For delete(). */ # include <unixio.h> /* For delete(). */
#endif #endif
#ifdef HAVE_LIBZ
# include <zlib.h>
#endif
#include "exits.h" #include "exits.h"
#include "utils.h" #include "utils.h"
#include "retr.h" #include "retr.h"
@ -84,6 +88,22 @@ limit_bandwidth_reset (void)
xzero (limit_data); xzero (limit_data);
} }
#ifdef HAVE_LIBZ
static voidpf
zalloc (voidpf opaque, unsigned int items, unsigned int size)
{
(void) opaque;
return (voidpf) xcalloc (items, size);
}
static void
zfree (voidpf opaque, voidpf address)
{
(void) opaque;
xfree (address);
}
#endif
/* Limit the bandwidth by pausing the download for an amount of time. /* Limit the bandwidth by pausing the download for an amount of time.
BYTES is the number of bytes received from the network, and TIMER BYTES is the number of bytes received from the network, and TIMER
is the timer that started at the beginning of download. */ is the timer that started at the beginning of download. */
@ -257,6 +277,44 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
wgint sum_written = 0; wgint sum_written = 0;
wgint remaining_chunk_size = 0; wgint remaining_chunk_size = 0;
#ifdef HAVE_LIBZ
/* try to minimize the number of calls to inflate() and write_data() per
call to fd_read() */
unsigned int gzbufsize = dlbufsize * 4;
char *gzbuf = NULL;
z_stream gzstream;
if (flags & rb_compressed_gzip)
{
gzbuf = xmalloc (gzbufsize);
if (gzbuf != NULL)
{
gzstream.zalloc = zalloc;
gzstream.zfree = zfree;
gzstream.opaque = Z_NULL;
gzstream.next_in = Z_NULL;
gzstream.avail_in = 0;
#define GZIP_DETECT 32 /* gzip format detection */
#define GZIP_WINDOW 15 /* logarithmic window size (default: 15) */
ret = inflateInit2 (&gzstream, GZIP_DETECT | GZIP_WINDOW);
if (ret != Z_OK)
{
xfree (gzbuf);
errno = (ret == Z_MEM_ERROR) ? ENOMEM : EINVAL;
ret = -1;
goto out;
}
}
else
{
errno = ENOMEM;
ret = -1;
goto out;
}
}
#endif
if (flags & rb_skip_startpos) if (flags & rb_skip_startpos)
skip = startpos; skip = startpos;
@ -383,12 +441,64 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
int write_res; int write_res;
sum_read += ret; sum_read += ret;
write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
if (write_res < 0) #ifdef HAVE_LIBZ
if (gzbuf != NULL)
{ {
ret = (write_res == -3) ? -3 : -2; int err;
goto out; int towrite;
gzstream.avail_in = ret;
gzstream.next_in = (unsigned char *) dlbuf;
do
{
gzstream.avail_out = gzbufsize;
gzstream.next_out = (unsigned char *) gzbuf;
err = inflate (&gzstream, Z_NO_FLUSH);
switch (err)
{
case Z_MEM_ERROR:
errno = ENOMEM;
ret = -1;
goto out;
case Z_NEED_DICT:
case Z_DATA_ERROR:
errno = EINVAL;
ret = -1;
goto out;
case Z_STREAM_END:
if (exact && sum_read != toread)
{
DEBUGP(("zlib stream ended unexpectedly after "
"%ld/%ld bytes\n", sum_read, toread));
}
}
towrite = gzbufsize - gzstream.avail_out;
write_res = write_data (out, out2, gzbuf, towrite, &skip,
&sum_written);
if (write_res < 0)
{
ret = (write_res == -3) ? -3 : -2;
goto out;
}
}
while (gzstream.avail_out == 0);
} }
else
#endif
{
write_res = write_data (out, out2, dlbuf, ret, &skip,
&sum_written);
if (write_res < 0)
{
ret = (write_res == -3) ? -3 : -2;
goto out;
}
}
if (chunked) if (chunked)
{ {
remaining_chunk_size -= ret; remaining_chunk_size -= ret;
@ -433,6 +543,31 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
if (timer) if (timer)
ptimer_destroy (timer); ptimer_destroy (timer);
#ifdef HAVE_LIBZ
if (gzbuf != NULL)
{
int err = inflateEnd (&gzstream);
if (ret >= 0)
{
/* with compression enabled, ret must be 0 if successful */
if (err == Z_OK)
ret = 0;
else
{
errno = EINVAL;
ret = -1;
}
}
xfree (gzbuf);
if (gzstream.total_in != sum_read)
{
DEBUGP(("zlib read size differs from raw read size (%lu/%lu)\n",
gzstream.total_in, sum_read));
}
}
#endif
if (qtyread) if (qtyread)
*qtyread += sum_read; *qtyread += sum_read;
if (qtywritten) if (qtywritten)

View File

@ -49,7 +49,9 @@ enum {
rb_skip_startpos = 2, rb_skip_startpos = 2,
/* Used by HTTP/HTTPS*/ /* Used by HTTP/HTTPS*/
rb_chunked_transfer_encoding = 4 rb_chunked_transfer_encoding = 4,
rb_compressed_gzip = 8
}; };
int fd_read_body (const char *, int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *); int fd_read_body (const char *, int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);