From ad261f41ceeb59242a096b31854038c3eff65c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de> Date: Tue, 29 May 2018 10:49:24 +0200 Subject: [PATCH] Save original data to WARC file * src/retr.c (write_data): Cleanup, (fd_read_body): Write to WARC before uncompressing Fixes: #53968 --- src/retr.c | 68 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/src/retr.c b/src/retr.c index 17ed228b..ae86730c 100644 --- a/src/retr.c +++ b/src/retr.c @@ -159,8 +159,8 @@ limit_bandwidth (wgint bytes, struct ptimer *timer) /* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that amount of data and decrease SKIP. Increment *TOTAL by the amount of data written. If OUT2 is not NULL, also write BUF to OUT2. - In case of error writing to OUT, -1 is returned. In case of error - writing to OUT2, -2 is returned. Return 1 if the whole BUF was + In case of error writing to OUT, -2 is returned. In case of error + writing to OUT2, -3 is returned. Return 1 if the whole BUF was skipped. */ static int @@ -169,25 +169,31 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize, { if (out == NULL && out2 == NULL) return 1; - if (*skip > bufsize) + + if (skip) { - *skip -= bufsize; - return 1; - } - if (*skip) - { - buf += *skip; - bufsize -= *skip; - *skip = 0; - if (bufsize == 0) - return 1; + if (*skip > bufsize) + { + *skip -= bufsize; + return 1; + } + if (*skip) + { + buf += *skip; + bufsize -= *skip; + *skip = 0; + if (bufsize == 0) + return 1; + } } - if (out != NULL) + if (out) fwrite (buf, 1, bufsize, out); - if (out2 != NULL) + if (out2) fwrite (buf, 1, bufsize, out2); - *written += bufsize; + + if (written) + *written += bufsize; /* Immediately flush the downloaded data. This should not hinder performance: fast downloads will arrive in large 16K chunks @@ -203,17 +209,18 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize, actual justification. (Also, why 16K? Anyone test other values?) */ #ifndef __VMS - if (out != NULL) + if (out) fflush (out); - if (out2 != NULL) + if (out2) fflush (out2); #endif /* ndef __VMS */ - if (out != NULL && ferror (out)) - return -1; - else if (out2 != NULL && ferror (out2)) + + if (out && ferror (out)) return -2; - else - return 0; + else if (out2 && ferror (out2)) + return -3; + + return 0; } /* Read the contents of file descriptor FD until it the connection @@ -452,6 +459,15 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread, { int err; int towrite; + + /* Write original data to WARC file */ + write_res = write_data (NULL, out2, dlbuf, ret, NULL, NULL); + if (write_res < 0) + { + ret = write_res; + goto out; + } + gzstream.avail_in = ret; gzstream.next_in = (unsigned char *) dlbuf; @@ -482,11 +498,11 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread, } towrite = gzbufsize - gzstream.avail_out; - write_res = write_data (out, out2, gzbuf, towrite, &skip, + write_res = write_data (out, NULL, gzbuf, towrite, &skip, &sum_written); if (write_res < 0) { - ret = (write_res == -3) ? -3 : -2; + ret = write_res; goto out; } } @@ -499,7 +515,7 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread, &sum_written); if (write_res < 0) { - ret = (write_res == -3) ? -3 : -2; + ret = write_res; goto out; } }