mirror of
https://github.com/mirror/wget.git
synced 2025-01-14 22:30:44 +08:00
[svn] Implemented the item I formerly had in the TODO: When -K and -N are used
together, we compare local file X.orig (if extant) against server file X. Previously -k and -N were worthless in combination because the local converted files always differed from the server versions.
This commit is contained in:
parent
c33c857eb2
commit
4331c39c9a
@ -1,3 +1,9 @@
|
||||
2000-03-01 Dan Harkless <dan-wget@dilvish.speed.net>
|
||||
|
||||
* NEWS (-K): Now possible to use -N with -k thanks to this option.
|
||||
|
||||
* TODO: Removed the -K / -N interaction item.
|
||||
|
||||
2000-02-29 Dan Harkless <dan-wget@dilvish.speed.net>
|
||||
|
||||
* NEWS (-K / --backup-converted): Mentioned this new option.
|
||||
|
3
NEWS
3
NEWS
@ -8,7 +8,8 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
|
||||
* Changes in Wget 1.5.3+dev
|
||||
|
||||
** New -K / --backup-converted / backup_converted = on option causes files
|
||||
modified due to -k to be saved with a .orig prefix before being changed.
|
||||
modified due to -k to be saved with a .orig prefix before being changed. When
|
||||
using -N as well, it is these .orig files that are compared against the server.
|
||||
|
||||
* Wget 1.5.3 is a bugfix release with no user-visible changes.
|
||||
|
||||
|
3
TODO
3
TODO
@ -76,6 +76,3 @@ particular order. Not all of them are user-visible changes.
|
||||
* Implement more HTTP/1.1 bells and whistles (ETag, Content-MD5 etc.)
|
||||
|
||||
* Support SSL encryption through SSLeay or OpenSSL.
|
||||
|
||||
* When -K is used with -N, check local file X.orig (if extant) against server
|
||||
file X.
|
||||
|
@ -1,3 +1,35 @@
|
||||
2000-03-01 Dan Harkless <dan-wget@dilvish.speed.net>
|
||||
|
||||
* ftp.c (ftp_loop_internal): Call new downloaded_file() function,
|
||||
even though we don't do conversion on HTML files retrieved via
|
||||
FTP, so _current_ usage of downloaded_file() makes this call unneeded.
|
||||
(ftp_retrieve_list): Added a comment saying where we need to
|
||||
stat() a .orig file if FTP'd HTML file conversion is ever implemented.
|
||||
(ftp_retrieve_list): "Local file '%s' is more recent," is sometimes
|
||||
a lie -- reworded as "Server file no newer than local file '%s' --".
|
||||
|
||||
* http.c (http_loop): Fixed a typo and clarified a comment.
|
||||
(http_loop): When -K and -N are specified together, compare size
|
||||
and timestamp of server file X against local file X.orig (if
|
||||
extant) rather than converted local file X.
|
||||
(http_loop): "Local file '%s' is more recent," is sometimes a lie
|
||||
-- reworded as "Server file no newer than local file '%s' --".
|
||||
(http_loop): Call new downloaded_file() function to prevent
|
||||
wrongful overwriting of .orig file when -N is specified.
|
||||
|
||||
* url.c (convert_links): When -K specified, only rename X to
|
||||
X.orig if downloaded_file() returns TRUE. Otherwise when we skip
|
||||
file X due to -N, we clobber an X.orig from a previous invocation.
|
||||
(convert_links): Call the failsafe xstrdup(), not the real strdup().
|
||||
(convert_links): Added a note asking anyone who understands how
|
||||
multiple URLs can correspond to a single file to comment it.
|
||||
(downloaded_file): Added this new function.
|
||||
|
||||
* url.h (downloaded_file): Added prototype for this new function
|
||||
as well as its downloaded_file_t enum type.
|
||||
|
||||
* wget.h (boolean): Added this new typedef and TRUE and FALSE #defines.
|
||||
|
||||
2000-02-29 Dan Harkless <dan-wget@dilvish.speed.net>
|
||||
|
||||
* version.c: Upped version to developer-only "1.5.3+dev".
|
||||
|
14
src/ftp.c
14
src/ftp.c
@ -947,6 +947,11 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
/* Not as great. */
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* If we get out of the switch above without continue'ing, we've
|
||||
successfully downloaded a file. Remember this fact. */
|
||||
downloaded_file(ADD_FILE, locf);
|
||||
|
||||
if (con->st & ON_YOUR_OWN)
|
||||
{
|
||||
CLOSE (RBUF_FD (&con->rbuf));
|
||||
@ -1086,6 +1091,11 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
if (opt.timestamping && f->type == FT_PLAINFILE)
|
||||
{
|
||||
struct stat st;
|
||||
/* If conversion of HTML files retrieved via FTP is ever implemented,
|
||||
we'll need to stat() <file>.orig here when -K has been specified.
|
||||
I'm not implementing it now since files on an FTP server are much
|
||||
more likely than files on an HTTP server to legitimately have a
|
||||
.orig suffix. */
|
||||
if (!stat (u->local, &st))
|
||||
{
|
||||
/* Else, get it from the file. */
|
||||
@ -1094,13 +1104,13 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
if (local_size == f->size && tml >= f->tstamp)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Local file `%s' is more recent, not retrieving.\n\n"), u->local);
|
||||
Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
|
||||
dlthis = 0;
|
||||
}
|
||||
else if (local_size != f->size)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
The sizes do not match (local %ld), retrieving.\n"), local_size);
|
||||
The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
}
|
||||
}
|
||||
} /* opt.timestamping && f->type == FT_PLAINFILE */
|
||||
|
66
src/http.c
66
src/http.c
@ -840,6 +840,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
|
||||
static int first_retrieval = 1;
|
||||
|
||||
int count;
|
||||
int local_dot_orig_file_exists = FALSE;
|
||||
int use_ts, got_head = 0; /* time-stamping info */
|
||||
char *tms, *suf, *locf, *tmrate;
|
||||
uerr_t err;
|
||||
@ -851,7 +852,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
|
||||
*newloc = NULL;
|
||||
|
||||
/* Warn on (likely bogus) wildcard usage in HTTP. Don't use
|
||||
has_wildcards_p because it would also warn on `?', and we that
|
||||
has_wildcards_p because it would also warn on `?', and we know that
|
||||
shows up in CGI paths a *lot*. */
|
||||
if (strchr (u->url, '*'))
|
||||
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
||||
@ -888,7 +889,43 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
use_ts = 0;
|
||||
if (opt.timestamping)
|
||||
{
|
||||
if (stat (u->local, &st) == 0)
|
||||
boolean local_file_exists = FALSE;
|
||||
|
||||
if (opt.backup_converted)
|
||||
/* If -K is specified, we'll act on the assumption that it was specified
|
||||
last time these files were downloaded as well, and instead of just
|
||||
comparing local file X against server file X, we'll compare local
|
||||
file X.orig (if extant, else X) against server file X. If -K
|
||||
_wasn't_ specified last time, or the server contains files called
|
||||
*.orig, -N will be back to not operating correctly with -k. */
|
||||
{
|
||||
size_t filename_len = strlen(u->local);
|
||||
char* filename_plus_orig_suffix = malloc(filename_len +
|
||||
sizeof(".orig"));
|
||||
|
||||
/* Would a single s[n]printf() call be faster? */
|
||||
strcpy(filename_plus_orig_suffix, u->local);
|
||||
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
|
||||
|
||||
/* Try to stat() the .orig file. */
|
||||
if (stat(filename_plus_orig_suffix, &st) == 0)
|
||||
{
|
||||
local_file_exists = TRUE;
|
||||
local_dot_orig_file_exists = TRUE;
|
||||
}
|
||||
|
||||
free(filename_plus_orig_suffix);
|
||||
}
|
||||
|
||||
if (!local_dot_orig_file_exists)
|
||||
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
||||
if (stat (u->local, &st) == 0)
|
||||
local_file_exists = TRUE;
|
||||
|
||||
if (local_file_exists)
|
||||
/* There was a local file, so we'll check later to see if the version
|
||||
the server has is the same version we already have, allowing us to
|
||||
skip a download. */
|
||||
{
|
||||
use_ts = 1;
|
||||
tml = st.st_mtime;
|
||||
@ -1051,14 +1088,26 @@ Last-modified header invalid -- time-stamp ignored.\n"));
|
||||
if (tml >= tmr &&
|
||||
(hstat.contlen == -1 || local_size == hstat.contlen))
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Local file `%s' is more recent, not retrieving.\n\n"), u->local);
|
||||
if (local_dot_orig_file_exists)
|
||||
/* We can't collapse this down into just one logprintf()
|
||||
call with a variable set to u->local or the .orig
|
||||
filename because we have to malloc() space for the
|
||||
latter, and because there are multiple returns above (a
|
||||
coding style no-no by many measures, for reasons such as
|
||||
this) we'd have to remember to free() the string at each
|
||||
one to avoid a memory leak. */
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Server file no newer than local file `%s.orig' -- not retrieving.\n\n"),
|
||||
u->local);
|
||||
else
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
|
||||
FREEHSTAT (hstat);
|
||||
return RETROK;
|
||||
}
|
||||
else if (tml >= tmr)
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
The sizes do not match (local %ld), retrieving.\n"), local_size);
|
||||
The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
else
|
||||
logputs (LOG_VERBOSE,
|
||||
_("Remote file is newer, retrieving.\n"));
|
||||
@ -1103,12 +1152,13 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
|
||||
}
|
||||
++opt.numurls;
|
||||
opt.downloaded += hstat.len;
|
||||
downloaded_file(ADD_FILE, locf);
|
||||
return RETROK;
|
||||
}
|
||||
else if (hstat.res == 0) /* No read error */
|
||||
{
|
||||
if (hstat.contlen == -1) /* We don't know how much we were
|
||||
supposed to get, so... */
|
||||
if (hstat.contlen == -1) /* We don't know how much we were supposed
|
||||
to get, so assume we succeeded. */
|
||||
{
|
||||
if (*dt & RETROKF)
|
||||
{
|
||||
@ -1121,6 +1171,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
|
||||
}
|
||||
++opt.numurls;
|
||||
opt.downloaded += hstat.len;
|
||||
downloaded_file(ADD_FILE, locf);
|
||||
return RETROK;
|
||||
}
|
||||
else if (hstat.len < hstat.contlen) /* meaning we lost the
|
||||
@ -1142,6 +1193,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
|
||||
tms, u->url, hstat.len, hstat.contlen, locf, count);
|
||||
++opt.numurls;
|
||||
opt.downloaded += hstat.len;
|
||||
downloaded_file(ADD_FILE, locf);
|
||||
return RETROK;
|
||||
}
|
||||
else /* the same, but not accepted */
|
||||
|
@ -350,7 +350,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
if (result != URLOK || u->proto != URLHTTP)
|
||||
{
|
||||
if (u->proto == URLHTTP)
|
||||
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg (result));
|
||||
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
|
||||
else
|
||||
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
||||
freeurl (u, 1);
|
||||
@ -455,7 +455,8 @@ retrieve_from_file (const char *file, int html, int *count)
|
||||
}
|
||||
status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
|
||||
if (opt.recursive && status == RETROK && (dt & TEXTHTML))
|
||||
status = recursive_retrieve (filename, new_file ? new_file : cur_url->url);
|
||||
status = recursive_retrieve (filename, new_file ? new_file
|
||||
: cur_url->url);
|
||||
|
||||
if (filename && opt.delete_after && file_exists_p (filename))
|
||||
{
|
||||
|
81
src/url.c
81
src/url.c
@ -1366,10 +1366,9 @@ no_proxy_match (const char *host, const char **no_proxy)
|
||||
void
|
||||
convert_links (const char *file, urlpos *l)
|
||||
{
|
||||
FILE *fp;
|
||||
char *buf, *p, *p2;
|
||||
long size;
|
||||
static slist* converted_files = NULL;
|
||||
FILE *fp;
|
||||
char *buf, *p, *p2;
|
||||
long size;
|
||||
|
||||
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
|
||||
/* Read from the file.... */
|
||||
@ -1383,28 +1382,34 @@ convert_links (const char *file, urlpos *l)
|
||||
/* ...to a buffer. */
|
||||
load_file (fp, &buf, &size);
|
||||
fclose (fp);
|
||||
if (opt.backup_converted)
|
||||
if (opt.backup_converted && downloaded_file(CHECK_FOR_FILE, file))
|
||||
/* Rather than just writing over the original .html file with the converted
|
||||
version, save the former to *.orig. */
|
||||
version, save the former to *.orig. Note we only do this for files we've
|
||||
_successfully_ downloaded, so we don't clobber .orig files sitting around
|
||||
from previous invocations. */
|
||||
{
|
||||
/* Construct the backup filename as the original name plus ".orig". */
|
||||
size_t filename_len = strlen(file);
|
||||
char* filename_plus_orig_suffix = malloc(filename_len + sizeof(".orig"));
|
||||
int already_wrote_backup_file = 0;
|
||||
slist* converted_file_ptr;
|
||||
size_t filename_len = strlen(file);
|
||||
char* filename_plus_orig_suffix = malloc(filename_len +
|
||||
sizeof(".orig"));
|
||||
boolean already_wrote_backup_file = FALSE;
|
||||
slist* converted_file_ptr;
|
||||
static slist* converted_files = NULL;
|
||||
|
||||
/* Would a single s[n]printf() call be faster? */
|
||||
strcpy(filename_plus_orig_suffix, file);
|
||||
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
|
||||
|
||||
/* We can get called twice on the same URL thanks to the
|
||||
convert_all_links() call in main(). If we write the .orig file each
|
||||
time in such a case, it'll end up containing the first-pass conversion,
|
||||
not the original file. */
|
||||
not the original file. So, see if we've already been called on this
|
||||
file. */
|
||||
converted_file_ptr = converted_files;
|
||||
while (converted_file_ptr != NULL)
|
||||
if (strcmp(converted_file_ptr->string, file) == 0)
|
||||
{
|
||||
already_wrote_backup_file = 1;
|
||||
already_wrote_backup_file = TRUE;
|
||||
break;
|
||||
}
|
||||
else
|
||||
@ -1421,10 +1426,13 @@ convert_links (const char *file, urlpos *l)
|
||||
Note that we never free this memory since we need it till the
|
||||
convert_all_links() call, which is one of the last things the
|
||||
program does before terminating. BTW, I'm not sure if it would be
|
||||
safe to just set converted_file_ptr->string to file below, rather
|
||||
than making a copy of the string... */
|
||||
safe to just set 'converted_file_ptr->string' to 'file' below,
|
||||
rather than making a copy of the string... Another note is that I
|
||||
thought I could just add a field to the urlpos structure saying
|
||||
that we'd written a .orig file for this URL, but that didn't work,
|
||||
so I had to make this separate list. */
|
||||
converted_file_ptr = malloc(sizeof(slist));
|
||||
converted_file_ptr->string = strdup(file);
|
||||
converted_file_ptr->string = xstrdup(file); /* die on out-of-mem. */
|
||||
converted_file_ptr->next = converted_files;
|
||||
converted_files = converted_file_ptr;
|
||||
}
|
||||
@ -1440,6 +1448,8 @@ convert_links (const char *file, urlpos *l)
|
||||
free (buf);
|
||||
return;
|
||||
}
|
||||
/* [If someone understands why multiple URLs can correspond to one local file,
|
||||
can they please add a comment here...?] */
|
||||
for (p = buf; l; l = l->next)
|
||||
{
|
||||
if (l->pos >= size)
|
||||
@ -1547,3 +1557,44 @@ add_url (urlpos *l, const char *url, const char *file)
|
||||
t->next = l;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
/* Remembers which files have been downloaded. Should be called with
|
||||
add_or_check == ADD_FILE for each file we actually download successfully
|
||||
(i.e. not for ones we have failures on or that we skip due to -N). If you
|
||||
just want to check if a file has been previously added without adding it,
|
||||
call with add_or_check == CHECK_FOR_FILE. Please be sure to call this
|
||||
function with local filenames, not remote URLs -- by some means that isn't
|
||||
commented well enough for me understand, multiple remote URLs can apparently
|
||||
correspond to a single local file. */
|
||||
boolean
|
||||
downloaded_file (downloaded_file_t add_or_check, const char* file)
|
||||
{
|
||||
boolean found_file = FALSE;
|
||||
static slist* downloaded_files = NULL;
|
||||
slist* rover = downloaded_files;
|
||||
|
||||
while (rover != NULL)
|
||||
if (strcmp(rover->string, file) == 0)
|
||||
{
|
||||
found_file = TRUE;
|
||||
break;
|
||||
}
|
||||
else
|
||||
rover = rover->next;
|
||||
|
||||
if (found_file)
|
||||
return TRUE; /* file had already been downloaded */
|
||||
else
|
||||
{
|
||||
if (add_or_check == ADD_FILE)
|
||||
{
|
||||
rover = malloc(sizeof(slist));
|
||||
rover->string = xstrdup(file); /* die on out-of-mem. */
|
||||
rover->next = downloaded_files;
|
||||
downloaded_files = rover;
|
||||
}
|
||||
|
||||
return FALSE; /* file had not already been downloaded */
|
||||
}
|
||||
}
|
||||
|
@ -62,6 +62,12 @@ typedef struct _urlpos
|
||||
struct _urlpos *next; /* Next struct in list */
|
||||
} urlpos;
|
||||
|
||||
/* Controls how downloaded_file() behaves. */
|
||||
typedef enum
|
||||
{
|
||||
ADD_FILE,
|
||||
CHECK_FOR_FILE
|
||||
} downloaded_file_t;
|
||||
|
||||
/* Function declarations */
|
||||
|
||||
@ -95,4 +101,6 @@ int no_proxy_match PARAMS ((const char *, const char **));
|
||||
void convert_links PARAMS ((const char *, urlpos *));
|
||||
urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
|
||||
|
||||
boolean downloaded_file PARAMS ((downloaded_file_t, const char *));
|
||||
|
||||
#endif /* URL_H */
|
||||
|
@ -198,4 +198,12 @@ typedef enum
|
||||
ROBOTSOK, NOROBOTS, PROXERR, AUTHFAILED, QUOTEXC, WRITEFAILED
|
||||
} uerr_t;
|
||||
|
||||
typedef unsigned char boolean;
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
#endif /* WGET_H */
|
||||
|
Loading…
Reference in New Issue
Block a user