Save original data to WARC file

* src/retr.c (write_data): Cleanup,
  (fd_read_body): Write to WARC before uncompressing

Fixes: #53968
This commit is contained in:
Tim Rühsen 2018-05-29 10:49:24 +02:00
parent c88500fca8
commit ad261f41ce

View File

@ -159,8 +159,8 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
amount of data and decrease SKIP. Increment *TOTAL by the amount
of data written. If OUT2 is not NULL, also write BUF to OUT2.
In case of error writing to OUT, -1 is returned. In case of error
writing to OUT2, -2 is returned. Return 1 if the whole BUF was
In case of error writing to OUT, -2 is returned. In case of error
writing to OUT2, -3 is returned. Return 1 if the whole BUF was
skipped. */
static int
@ -169,25 +169,31 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
{
if (out == NULL && out2 == NULL)
return 1;
if (*skip > bufsize)
if (skip)
{
*skip -= bufsize;
return 1;
}
if (*skip)
{
buf += *skip;
bufsize -= *skip;
*skip = 0;
if (bufsize == 0)
return 1;
if (*skip > bufsize)
{
*skip -= bufsize;
return 1;
}
if (*skip)
{
buf += *skip;
bufsize -= *skip;
*skip = 0;
if (bufsize == 0)
return 1;
}
}
if (out != NULL)
if (out)
fwrite (buf, 1, bufsize, out);
if (out2 != NULL)
if (out2)
fwrite (buf, 1, bufsize, out2);
*written += bufsize;
if (written)
*written += bufsize;
/* Immediately flush the downloaded data. This should not hinder
performance: fast downloads will arrive in large 16K chunks
@ -203,17 +209,18 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
actual justification. (Also, why 16K? Anyone test other values?)
*/
#ifndef __VMS
if (out != NULL)
if (out)
fflush (out);
if (out2 != NULL)
if (out2)
fflush (out2);
#endif /* ndef __VMS */
if (out != NULL && ferror (out))
return -1;
else if (out2 != NULL && ferror (out2))
if (out && ferror (out))
return -2;
else
return 0;
else if (out2 && ferror (out2))
return -3;
return 0;
}
/* Read the contents of file descriptor FD until it the connection
@ -452,6 +459,15 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
{
int err;
int towrite;
/* Write original data to WARC file */
write_res = write_data (NULL, out2, dlbuf, ret, NULL, NULL);
if (write_res < 0)
{
ret = write_res;
goto out;
}
gzstream.avail_in = ret;
gzstream.next_in = (unsigned char *) dlbuf;
@ -482,11 +498,11 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
}
towrite = gzbufsize - gzstream.avail_out;
write_res = write_data (out, out2, gzbuf, towrite, &skip,
write_res = write_data (out, NULL, gzbuf, towrite, &skip,
&sum_written);
if (write_res < 0)
{
ret = (write_res == -3) ? -3 : -2;
ret = write_res;
goto out;
}
}
@ -499,7 +515,7 @@ fd_read_body (const char *downloaded_filename, int fd, FILE *out, wgint toread,
&sum_written);
if (write_res < 0)
{
ret = (write_res == -3) ? -3 : -2;
ret = write_res;
goto out;
}
}