Improved exit status handling.

2025-04-14 21:40:42 +08:00 · 2009-08-27 23:08:58 -07:00 · 2009-08-27 23:08:58 -07:00 · b014f8fae9
commit b014f8fae9
parent 88253c0d00
23 changed files with 232 additions and 33 deletions
--- a/4
+++ b/4
@ -1,3 +1,7 @@
+2009-08-27  Micah Cowan  <micah@cowan.name>
+
+	* NEWS: Mention the changes to exit codes.
+
 2009-08-27  Micah Cowan  <micah@cowan.name>

 	* NEWS: Add mention of the NUL characters SSL security fix.
--- a/3
+++ b/3
@ -32,6 +32,9 @@ are translated from their source encoding to UTF-8 before percent-encoding.
   IRI support was added by Saint Xavier <wget@sxav.eu>, as his
   project for the Google Summer of Code.

+** Wget now provides more sensible exit status codes when downloads
+don't proceed as expected (see the manual).
+
 ** --default-page option (and associated wgetrc command) added to
 support alternative default names for index.html.

--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,28 @@
+2009-08-27  Micah Cowan  <micah@cowan.name>
+
+	* wget.h (uerr_t): added new VERIFCERTERR code for SSL certificate
+	problems. Marked exit codes that are defined but never used (at
+	least, the ones I could find).
+
+	* retr.c, retr.h (retrieve_url): Added a new boolean argument to
+	determine whether an exit status should be recorded.
+	(retrieve_from_file): Adjust to new retrieve_url signature.
+
+	* res.c (res_retrieve_file): Don't have retrieve_url record an
+	exit status for robots.txt.
+
+	* recur.c (retrieve_tree): Adjust to new retrieve_url signature.
+
+	* main.c (main): Use the exit status stored by retrieve_url.
+
+	* http.c (gethttp): Distinguish certificate verification problems
+	from SSL connection issues.
+	(http_loop): Handle newly-created VERIFCERTERR error code.
+
+	* exits.c, exits.h: Newly added.
+	
+	* Makefile.am (wget_SOURCES): Add exits.c and exits.h.
+
 2009-08-27  Micah Cowan  <micah@cowan.name>

 	* http.c (gethttp): Make sure Wget heeds cookies when they
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -49,7 +49,8 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c    		  \
 	       ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h      \
 	       http.h http-ntlm.h init.h log.h mswindows.h netrc.h        \
 	       options.h progress.h ptimer.h recur.h res.h retr.h         \
-	       spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h
+	       spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h 	  \
+	       exits.c exits.h
 nodist_wget_SOURCES = build_info.c version.c
 EXTRA_wget_SOURCES = mswindows.c iri.c
 LDADD = $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@
--- a/src/exits.c
+++ b/src/exits.c
@ -0,0 +1,111 @@
+/* Command line parsing.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GNU Wget.
+
+   GNU Wget is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   GNU Wget is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with Wget.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "wget.h"
+#include "exits.h"
+
+/* Final exit code possibilities. Exit codes 1 and 2 are reserved
+ * for situations that lead to direct exits from Wget, not using the
+ * value of final_exit_status. */
+enum
+  {
+    WGET_EXIT_SUCCESS = 0,
+
+    WGET_EXIT_MINIMUM = 3,
+    WGET_EXIT_IO_FAIL = WGET_EXIT_MINIMUM,
+    WGET_EXIT_NETWORK_FAIL = 4,
+    WGET_EXIT_SSL_AUTH_FAIL = 5,
+    WGET_EXIT_SERVER_AUTH_FAIL = 6,
+    WGET_EXIT_PROTOCOL_ERROR = 7,
+    WGET_EXIT_SERVER_ERROR = 8,
+
+    WGET_EXIT_UNKNOWN
+  };
+
+static int final_exit_status = WGET_EXIT_SUCCESS;
+
+/* XXX: I don't like that newly-added uerr_t codes will doubtless fall
+   through the craccks, or the fact that we seem to have way more
+   codes than we know what to do with. Need to go through and sort
+   through the truly essential codes, and merge the rest with
+   those. Quite a few are never even used!
+
+   Quite a few of the codes below would have no business being
+   returned to retrieve_url's caller, but since it's very difficult to
+   determine which do and which don't, I grab virtually all of them to
+   be safe. */
+static int
+get_status_for_err (uerr_t err)
+{
+  switch (err)
+    {
+    case RETROK:
+      return WGET_EXIT_SUCCESS;
+    case FOPENERR: case FOPEN_EXCL_ERR: case FWRITEERR: case WRITEFAILED:
+      return WGET_EXIT_IO_FAIL;
+    case NOCONERROR: case HOSTERR: case CONSOCKERR: case CONERROR:
+    case CONSSLERR: case CONIMPOSSIBLE: case FTPRERR: case FTPINVPASV:
+    case READERR: case TRYLIMEXC:
+      return WGET_EXIT_NETWORK_FAIL;
+    case VERIFCERTERR:
+      return WGET_EXIT_SSL_AUTH_FAIL;
+    case FTPLOGINC: case FTPLOGREFUSED: case AUTHFAILED:
+      return WGET_EXIT_SERVER_AUTH_FAIL;
+    case HEOF: case HERR:
+      return WGET_EXIT_PROTOCOL_ERROR;
+    case WRONGCODE: case FTPPORTERR: case FTPSYSERR:
+    case FTPNSFOD: case FTPUNKNOWNTYPE: case FTPSRVERR:
+    case FTPRETRINT: case FTPRESTFAIL: case FTPNOPASV:
+    case CONTNOTSUPPORTED: case RANGEERR: case RETRBADPATTERN:
+    case PROXERR:
+      return WGET_EXIT_SERVER_ERROR;
+    case URLERROR: case QUOTEXC: case SSLINITFAILED:
+    default:
+      return WGET_EXIT_UNKNOWN;
+    }
+}
+
+/* inform_exit_status
+ *
+ * Ensure that Wget's exit status will reflect the problem indicated
+ * by ERR, unless the exit status has already been set to reflect a more
+ * important problem. */
+void
+inform_exit_status (uerr_t err)
+{
+  int new_status = get_status_for_err (err);
+
+  if (new_status != WGET_EXIT_SUCCESS
+      && (final_exit_status == WGET_EXIT_SUCCESS
+          || new_status < final_exit_status))
+    {
+      final_exit_status = new_status;
+    }
+}
+
+int
+get_exit_status (void)
+{
+  return
+    (final_exit_status == WGET_EXIT_UNKNOWN)
+      ? 1
+      : final_exit_status;
+}
+
--- a/src/exits.h
+++ b/src/exits.h
@ -0,0 +1,30 @@
+/* Internationalization related declarations.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+This file is part of GNU Wget.
+
+GNU Wget is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+(at your option) any later version.
+
+GNU Wget is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef WGET_EXITS_H
+#define WGET_EXITS_H
+
+#include "wget.h"
+
+
+void inform_exit_status (uerr_t err);
+
+int get_exit_status (void);
+
+
+#endif /* WGET_EXITS_H */
--- a/src/http.c
+++ b/src/http.c
@ -1762,11 +1762,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,

      if (conn->scheme == SCHEME_HTTPS)
        {
-          if (!ssl_connect_wget (sock) || !ssl_check_certificate (sock, u->host))
+          if (!ssl_connect_wget (sock))
            {
              fd_close (sock);
              return CONSSLERR;
            }
+          else if (!ssl_check_certificate (sock, u->host))
+            {
+              fd_close (sock);
+              return VERIFCERTERR;
+            }
          using_ssl = true;
        }
 #endif /* HAVE_SSL */
@ -2598,7 +2603,7 @@ Spider mode enabled. Check if remote file exists.\n"));
          logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
                     quote (hstat.local_file), strerror (errno));
        case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: 
-        case SSLINITFAILED: case CONTNOTSUPPORTED:
+        case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR:
          /* Fatal errors just return from the function.  */
          ret = err;
          goto exit;
--- a/src/main.c
+++ b/src/main.c
@ -44,6 +44,7 @@ as that of the covered work.  */
 #include <errno.h>
 #include <time.h>

+#include "exits.h"
 #include "utils.h"
 #include "init.h"
 #include "retr.h"
@ -1289,7 +1290,7 @@ WARNING: Can't reopen standard output in binary mode;\n\
          else
          {
            status = retrieve_url (url_parsed, *t, &filename, &redirected_URL,
-                                   NULL, &dt, opt.recursive, iri);
+                                   NULL, &dt, opt.recursive, iri, true);
          }

          if (opt.delete_after && file_exists_p(filename))
@ -1354,10 +1355,7 @@ WARNING: Can't reopen standard output in binary mode;\n\
    xfree (url[i]);
  cleanup ();

-  if (status == RETROK)
-    return 0;
-  else
-    return 1;
+  return get_exit_status ();
 }
 #endif /* TESTING */

--- a/src/recur.c
+++ b/src/recur.c
@ -283,7 +283,7 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi)
          struct url *url_parsed = url_parse (url, &url_err, i, true);

          status = retrieve_url (url_parsed, url, &file, &redirected, referer,
-                                 &dt, false, i);
+                                 &dt, false, i, true);

          if (html_allowed && file && status == RETROK
              && (dt & RETROKF) && (dt & TEXTHTML))
--- a/src/res.c
+++ b/src/res.c
@ -562,7 +562,7 @@ res_retrieve_file (const char *url, char **file, struct iri *iri)
  else
    {
      err = retrieve_url (url_parsed, robots_url, file, NULL, NULL, NULL,
-                          false, i);
+                          false, i, false);
      url_free(url_parsed);
    }

--- a/src/retr.c
+++ b/src/retr.c
@ -39,6 +39,7 @@ as that of the covered work.  */
 #include <string.h>
 #include <assert.h>

+#include "exits.h"
 #include "utils.h"
 #include "retr.h"
 #include "progress.h"
@ -611,7 +612,7 @@ static char *getproxy (struct url *);
 uerr_t
 retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
              char **newloc, const char *refurl, int *dt, bool recursive,
-              struct iri *iri)
+              struct iri *iri, bool register_status)
 {
  uerr_t result;
  char *url;
@ -668,7 +669,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
          xfree (url);
          xfree (error);
          RESTORE_POST_DATA;
-          return PROXERR;
+          result = PROXERR;
+          goto bail;
        }
      if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
        {
@ -676,7 +678,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
          url_free (proxy_url);
          xfree (url);
          RESTORE_POST_DATA;
-          return PROXERR;
+          result = PROXERR;
+          goto bail;
        }
    }

@ -757,7 +760,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
          xfree (mynewloc);
          xfree (error);
          RESTORE_POST_DATA;
-          return result;
+          goto bail;
        }

      /* Now mynewloc will become newloc_parsed->url, because if the
@ -779,7 +782,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
          xfree (url);
          xfree (mynewloc);
          RESTORE_POST_DATA;
-          return WRONGCODE;
+          result = WRONGCODE;
+          goto bail;
        }

      xfree (url);
@ -866,6 +870,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,

  RESTORE_POST_DATA;

+bail:
+  if (register_status)
+    inform_exit_status (result);
  return result;
 }

@ -910,7 +917,7 @@ retrieve_from_file (const char *file, bool html, int *count)
        opt.base_href = xstrdup (url);

      status = retrieve_url (url_parsed, url, &input_file, NULL, NULL, &dt,
-                             false, iri);
+                             false, iri, true);
      if (status != RETROK)
        return status;

@ -970,7 +977,8 @@ retrieve_from_file (const char *file, bool html, int *count)
      else
        status = retrieve_url (parsed_url ? parsed_url : cur_url->url,
                               cur_url->url->url, &filename,
-                               &new_file, NULL, &dt, opt.recursive, tmpiri);
+                               &new_file, NULL, &dt, opt.recursive, tmpiri,
+                               true);

      if (parsed_url)
          url_free (parsed_url);
--- a/src/retr.h
+++ b/src/retr.h
@ -54,7 +54,7 @@ char *fd_read_hunk (int, hunk_terminator_t, long, long);
 char *fd_read_line (int);

 uerr_t retrieve_url (struct url *, const char *, char **, char **,
-                     const char *, int *, bool, struct iri *);
+                     const char *, int *, bool, struct iri *, bool);
 uerr_t retrieve_from_file (const char *, bool, int *);

 const char *retr_rate (wgint, double);
--- a/src/wget.h
+++ b/src/wget.h
@ -331,21 +331,23 @@ typedef enum
 {
  /*  0  */
  NOCONERROR, HOSTERR, CONSOCKERR, CONERROR, CONSSLERR,
-  CONIMPOSSIBLE, NEWLOCATION, NOTENOUGHMEM, CONPORTERR, CONCLOSED, 
+  CONIMPOSSIBLE, NEWLOCATION, NOTENOUGHMEM /* ! */,
+  CONPORTERR /* ! */, CONCLOSED /* ! */, 
  /* 10  */
  FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR, FTPSYSERR,
-  FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR, FTPREXC, 
+  FTPNSFOD, FTPRETROK /* ! */, FTPUNKNOWNTYPE, FTPRERR, FTPREXC /* ! */, 
  /* 20  */
  FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR, FOPENERR, 
-  FOPEN_EXCL_ERR, FWRITEERR, HOK, HLEXC, HEOF,
+  FOPEN_EXCL_ERR, FWRITEERR, HOK /* ! */, HLEXC /* ! */, HEOF,
  /* 30  */
-  HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
+  HERR, RETROK, RECLEVELEXC, FTPACCDENIED /* ! */, WRONGCODE,
  FTPINVPASV, FTPNOPASV, CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, 
  /* 40  */
-  READERR, TRYLIMEXC, URLBADPATTERN, FILEBADFILE, RANGEERR, 
-  RETRBADPATTERN, RETNOTSUP, ROBOTSOK, NOROBOTS, PROXERR, 
+  READERR, TRYLIMEXC, URLBADPATTERN /* ! */, FILEBADFILE /* ! */, RANGEERR, 
+  RETRBADPATTERN, RETNOTSUP /* ! */, ROBOTSOK /* ! */, NOROBOTS /* ! */,
+  PROXERR, 
  /* 50  */
-  AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED
+  AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR
 } uerr_t;

 /* 2005-02-19 SMS.
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@ -1,3 +1,14 @@
+2009-08-27  Micah Cowan  <micah@cowan.name>
+
+	* WgetTest.pm.in (run): Shift the errcode right by 8 binary places.
+	
+	* Test--spider-fail.px, Test--spider-r--no-content-disposition.px,
+	Test--spider-r--no-content-disposition-trivial.px,
+	Test--spider-r-HTTP-Content-Disposition.px, Test--spider-r.px,
+	Test-O-nonexisting.px, Test-cookies-401.px,
+	Test-nonexisting-quiet.px: Adjusted "expected error code"; Wget's
+	exit codes have changed.
+
 2009-08-27  Micah Cowan  <micah@cowan.name>

 	* run-px: Added Test-cookies.px, Test-cookies-401.px
--- a/tests/Test--spider-fail.px
+++ b/tests/Test--spider-fail.px
@ -35,7 +35,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --spider http://localhost:{{port}}/nonexistent";

-my $expected_error_code = 256;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
 );
--- a/tests/Test--spider-r--no-content-disposition-trivial.px
+++ b/tests/Test--spider-r--no-content-disposition-trivial.px
@ -92,7 +92,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --spider -r --no-content-disposition http://localhost:{{port}}/";

-my $expected_error_code = 0;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
 );
--- a/tests/Test--spider-r--no-content-disposition.px
+++ b/tests/Test--spider-r--no-content-disposition.px
@ -93,7 +93,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --spider -r --no-content-disposition http://localhost:{{port}}/";

-my $expected_error_code = 0;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
 );
--- a/tests/Test--spider-r-HTTP-Content-Disposition.px
+++ b/tests/Test--spider-r-HTTP-Content-Disposition.px
@ -93,7 +93,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --spider -r http://localhost:{{port}}/";

-my $expected_error_code = 0;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
 );
--- a/tests/Test--spider-r.px
+++ b/tests/Test--spider-r.px
@ -92,7 +92,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --spider -r http://localhost:{{port}}/";

-my $expected_error_code = 0;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
 );
--- a/tests/Test-O-nonexisting.px
+++ b/tests/Test-O-nonexisting.px
@ -26,7 +26,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --quiet -O out http://localhost:{{port}}/nonexistent";

-my $expected_error_code = 256;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
    'out' => {
--- a/tests/Test-cookies-401.px
+++ b/tests/Test-cookies-401.px
@ -32,7 +32,7 @@ my %urls = (
 my $cmdline = $WgetTest::WGETPATH . " -d http://localhost:{{port}}/one.txt"
    . " http://localhost:{{port}}/two.txt";

-my $expected_error_code = 0;
+my $expected_error_code = 6;

 my %expected_downloaded_files = (
    'two.txt' => {
--- a/tests/Test-nonexisting-quiet.px
+++ b/tests/Test-nonexisting-quiet.px
@ -26,7 +26,7 @@ my %urls = (

 my $cmdline = $WgetTest::WGETPATH . " --quiet http://localhost:{{port}}/nonexistent";

-my $expected_error_code = 256;
+my $expected_error_code = 8;

 my %expected_downloaded_files = (
 );
--- a/tests/WgetTest.pm.in
+++ b/tests/WgetTest.pm.in
@ -88,6 +88,7 @@ sub run {
        ($cmdline =~ m{^/.*})
            ? system ($cmdline)
            : system ("$self->{_workdir}/../src/$cmdline");
+    $errcode >>= 8; # XXX: should handle abnormal error codes.

    # Shutdown server
    # if we didn't explicitely kill the server, we would have to call