mirror of
https://github.com/mirror/wget.git
synced 2025-03-14 20:00:15 +08:00
Resolve conflicts to complete merging with master branch.
This commit is contained in:
commit
eb01e9d442
65
ChangeLog
65
ChangeLog
@ -1,3 +1,68 @@
|
||||
2012-07-08 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* bootstrap: Update from gnulib.
|
||||
* bootstrap.conf (gnulib_extra_files): Remove $build_aux/missing.
|
||||
* lib/Makefile.am: Delete file.
|
||||
|
||||
2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* bootstrap.conf (gnulib_modules): Remove `closeout'.
|
||||
Reported by: Micah Cowan <micah@cowan.name>.
|
||||
|
||||
2012-05-31 Ángel González <keisial@gmail.com>
|
||||
|
||||
* convert.c: fix segfault on wrong urls (bug 36570)
|
||||
|
||||
2012-05-13 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* bootstrap.conf (gnulib_modules): Add `git-version-gen'.
|
||||
* build-aux/bzr-version-gen: Remove file.
|
||||
* configure.ac: Invoke `build-aux/git-version-gen' to get the dist
|
||||
version.
|
||||
* Makefile.am (EXTRA_DIST): Distribute build-aux/git-version-gen instead
|
||||
of build-aux/bzr-version-gen.
|
||||
|
||||
2012-04-11 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* bootstrap.conf (gnulib_modules): Include module `regex'.
|
||||
* configure.ac: Check for PCRE library.
|
||||
|
||||
2012-03-25 Ray Satiro <raysatiro@yahoo.com>
|
||||
|
||||
* configure.ac: Fix build under mingw when OpenSSL is used.
|
||||
|
||||
2012-03-20 Ángel González <keisial@gmail.com>
|
||||
|
||||
* bootstrap.conf (gnulib_modules): Add modules `ftello',
|
||||
`mkstemp' and `strtok_r'.
|
||||
|
||||
2012-02-26 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* bootstrap.conf (gnulib_modules): Add module `closeout'.
|
||||
|
||||
2012-01-09 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* configure.ac: Always try to use libz, even without SSL.
|
||||
|
||||
2011-12-12 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add build-aux/bzr-version-gen.
|
||||
Reported by: Elan Ruusamäe <glen@pld-linux.org>.
|
||||
|
||||
2011-12-11 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* util/trunc.c (main): Call `close' on the fd and check for errors.
|
||||
Reported by: <dga@cs.cmu.edu>.
|
||||
|
||||
2011-10-23 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* bootstrap.conf (gnulib_modules): Include module `vsnprintf'.
|
||||
|
||||
2011-10-16 Steven Schubiger <stsc@member.fsf.org>
|
||||
|
||||
* util/paramcheck.pl: Match 1 or more times where applicable.
|
||||
(extract_entries): Return a copy instead of reference.
|
||||
|
||||
2011-09-04 Alan Hourihane <alanh@fairlite.co.uk> (tiny change)
|
||||
|
||||
* configure.ac: Check for libz when gnutls is used.
|
||||
|
@ -46,7 +46,7 @@ SUBDIRS = lib src doc po tests util
|
||||
EXTRA_DIST = ChangeLog.README MAILING-LIST \
|
||||
msdos/ChangeLog msdos/config.h msdos/Makefile.DJ \
|
||||
msdos/Makefile.WC ABOUT-NLS \
|
||||
build-aux/build_info.pl .version
|
||||
build-aux/build_info.pl build-aux/git-version-gen .version
|
||||
|
||||
CLEANFILES = *~ *.bak $(DISTNAME).tar.gz
|
||||
|
||||
|
26
NEWS
26
NEWS
@ -1,16 +1,37 @@
|
||||
GNU Wget NEWS -- history of user-visible changes.
|
||||
|
||||
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
|
||||
2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
See the end for copying conditions.
|
||||
|
||||
Please send GNU Wget bug reports to <bug-wget@gnu.org>.
|
||||
|
||||
* Changes in Wget X.Y.Z
|
||||
* Changes in Wget 1.14
|
||||
|
||||
** Add support for content-on-error. It allows to store the HTTP
|
||||
payload on 4xx or 5xx errors.
|
||||
|
||||
** Add support for WARC files.
|
||||
|
||||
** Fix a memory leak problem in the GNU TLS backend.
|
||||
|
||||
** Autoreconf works again for distributed tarballs.
|
||||
|
||||
** Print some diagnostic messages to stderr not to stdout.
|
||||
|
||||
** Report stdout close errors.
|
||||
|
||||
** Accept the --report-speed option.
|
||||
|
||||
** Enable client certificates when GNU TLS is used.
|
||||
|
||||
** Add support for TLS Server Name Indication.
|
||||
|
||||
** Accept the arguments --accept-reject and --reject-regex.
|
||||
|
||||
** The GNU TLS backend honors correctly the timeout value.
|
||||
|
||||
** Add support for RFC 2617 Digest Access Authentication.
|
||||
|
||||
* Changes in Wget 1.13.4
|
||||
|
||||
@ -23,6 +44,7 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
|
||||
** Return a network failure when FTP downloads fail and --timestamping
|
||||
is specified.
|
||||
|
||||
** Fix a segfault on an incomplete STYLE tag.
|
||||
|
||||
* Changes in Wget 1.13.3
|
||||
|
||||
|
61
bootstrap
61
bootstrap
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Print a version string.
|
||||
scriptversion=2012-07-19.14; # UTC
|
||||
scriptversion=2012-07-06.11; # UTC
|
||||
|
||||
# Bootstrap this package from checked-out sources.
|
||||
|
||||
@ -77,32 +77,15 @@ Running without arguments will suffice in most cases.
|
||||
EOF
|
||||
}
|
||||
|
||||
# warnf_ FORMAT-STRING ARG1...
|
||||
warnf_ ()
|
||||
warn()
|
||||
{
|
||||
warnf_format_=$1
|
||||
shift
|
||||
nl='
|
||||
'
|
||||
case $* in
|
||||
*$nl*) me_=$(printf "$me"|tr "$nl|" '??')
|
||||
printf "$warnf_format_" "$@" | sed "s|^|$me_: |" ;;
|
||||
*) printf "$me: $warnf_format_" "$@" ;;
|
||||
esac >&2
|
||||
for i
|
||||
do
|
||||
echo "$i"
|
||||
done | sed -e "s/^/$me: /" >&2
|
||||
}
|
||||
|
||||
# warn_ WORD1...
|
||||
warn_ ()
|
||||
{
|
||||
# If IFS does not start with ' ', set it and emit the warning in a subshell.
|
||||
case $IFS in
|
||||
' '*) warnf_ '%s\n' "$*";;
|
||||
*) (IFS=' '; warn_ "$@");;
|
||||
esac
|
||||
}
|
||||
|
||||
# die WORD1...
|
||||
die() { warn_ "$@"; exit 1; }
|
||||
die() { warn "$@"; exit 1; }
|
||||
|
||||
# Configuration.
|
||||
|
||||
@ -354,7 +337,8 @@ grep '^[ ]*AC_CONFIG_AUX_DIR(\['"$build_aux"'\])' configure.ac \
|
||||
grep '^[ ]*AC_CONFIG_AUX_DIR('"$build_aux"')' configure.ac \
|
||||
>/dev/null && found_aux_dir=yes
|
||||
test $found_aux_dir = yes \
|
||||
|| die "configure.ac lacks 'AC_CONFIG_AUX_DIR([$build_aux])'; add it"
|
||||
|| die "expected line not found in configure.ac. Add the following:" \
|
||||
" AC_CONFIG_AUX_DIR([$build_aux])"
|
||||
|
||||
# If $build_aux doesn't exist, create it now, otherwise some bits
|
||||
# below will malfunction. If creating it, also mark it as ignored.
|
||||
@ -460,7 +444,7 @@ check_versions() {
|
||||
automake-ng|aclocal-ng)
|
||||
app=${app%-ng}
|
||||
($app --version | grep '(GNU automake-ng)') >/dev/null 2>&1 || {
|
||||
warn_ "Error: '$app' not found or not from Automake-NG"
|
||||
warn "Error: '$app' not found or not from Automake-NG"
|
||||
ret=1
|
||||
continue
|
||||
} ;;
|
||||
@ -470,14 +454,7 @@ check_versions() {
|
||||
# so we have to rely on $? rather than get_version.
|
||||
$app --version >/dev/null 2>&1
|
||||
if [ 126 -le $? ]; then
|
||||
warn_ "Error: '$app' not found"
|
||||
ret=1
|
||||
fi
|
||||
else
|
||||
# Require app to produce a new enough version string.
|
||||
inst_ver=$(get_version $app)
|
||||
if [ ! "$inst_ver" ]; then
|
||||
warn_ "Error: '$app' not found"
|
||||
warn "Error: '$app' not found"
|
||||
ret=1
|
||||
else
|
||||
latest_ver=$(sort_ver $req_ver $inst_ver | cut -d' ' -f2)
|
||||
@ -488,6 +465,20 @@ check_versions() {
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# Require app to produce a new enough version string.
|
||||
inst_ver=$(get_version $app)
|
||||
if [ ! "$inst_ver" ]; then
|
||||
warn "Error: '$app' not found"
|
||||
ret=1
|
||||
else
|
||||
latest_ver=$(sort_ver $req_ver $inst_ver | cut -d' ' -f2)
|
||||
if [ ! "$latest_ver" = "$inst_ver" ]; then
|
||||
warn "Error: '$app' version == $inst_ver is too old" \
|
||||
" '$app' version >= $req_ver is required"
|
||||
ret=1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
@ -780,7 +771,7 @@ version_controlled_file() {
|
||||
grep -F "/${file##*/}/" "$parent/CVS/Entries" 2>/dev/null |
|
||||
grep '^/[^/]*/[0-9]' > /dev/null
|
||||
else
|
||||
warn_ "no version control for $file?"
|
||||
warn "no version control for $file?"
|
||||
false
|
||||
fi
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
# bootstrap.conf - Bootstrap configuration.
|
||||
# Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
||||
# Inc.
|
||||
#
|
||||
# This file is part of GNU Wget.
|
||||
@ -28,6 +28,7 @@ gnulib_modules="
|
||||
accept
|
||||
alloca
|
||||
announce-gen
|
||||
base32
|
||||
bind
|
||||
c-ctype
|
||||
clock-time
|
||||
@ -35,11 +36,13 @@ close
|
||||
connect
|
||||
fcntl
|
||||
futimens
|
||||
ftello
|
||||
getaddrinfo
|
||||
getopt-gnu
|
||||
getpass-gnu
|
||||
getpeername
|
||||
getsockname
|
||||
git-version-gen
|
||||
gnupload
|
||||
ioctl
|
||||
iconv
|
||||
@ -48,6 +51,7 @@ listen
|
||||
maintainer-makefile
|
||||
mbtowc
|
||||
mkdir
|
||||
mkstemp
|
||||
crypto/md5
|
||||
crypto/sha1
|
||||
crypto/sha256
|
||||
@ -55,6 +59,7 @@ pipe
|
||||
quote
|
||||
quotearg
|
||||
recv
|
||||
regex
|
||||
select
|
||||
send
|
||||
setsockopt
|
||||
@ -65,9 +70,12 @@ socket
|
||||
stdbool
|
||||
strcasestr
|
||||
strerror_r-posix
|
||||
strtok_r
|
||||
tmpdir
|
||||
unlocked-io
|
||||
update-copyright
|
||||
vasprintf
|
||||
vsnprintf
|
||||
write
|
||||
"
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
scriptversion=2011-08-11.08; # UTC
|
||||
|
||||
# Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Additional permission under GNU GPL version 3 section 7
|
||||
|
||||
|
||||
# Written by Giuseppe Scrivano.
|
||||
|
||||
if test -f .tarball-version
|
||||
then
|
||||
cat .tarball-version | tr -d '\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
DIRTY=""
|
||||
|
||||
test -n "`bzr diff | tr -d '\n'`" && DIRTY="-dirty"
|
||||
|
||||
REVNO=`bzr revno`
|
||||
|
||||
TAG=`bzr tags -r $REVNO | cut -d' ' -f1`
|
||||
if test -z "$TAG"
|
||||
then
|
||||
TAG=`bzr tags --sort=time -r ..$REVNO | tail -n1 | cut -d' ' -f1`
|
||||
|
||||
# No tags yet
|
||||
test -z "$TAG" && TAG="unknown"
|
||||
|
||||
TAG=$TAG-$REVNO
|
||||
fi
|
||||
|
||||
|
||||
printf "%s%s" "$TAG" "$DIRTY"
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
43
configure.ac
43
configure.ac
@ -31,7 +31,7 @@ dnl Process this file with autoconf to produce a configure script.
|
||||
dnl
|
||||
|
||||
AC_INIT([wget],
|
||||
[m4_esyscmd([build-aux/bzr-version-gen])],
|
||||
m4_esyscmd([build-aux/git-version-gen .tarball-version]),
|
||||
[bug-wget@gnu.org])
|
||||
AC_PREREQ(2.61)
|
||||
|
||||
@ -65,6 +65,9 @@ AC_ARG_WITH(ssl,
|
||||
[[ --without-ssl disable SSL autodetection
|
||||
--with-ssl={gnutls,openssl} specify the SSL backend. GNU TLS is the default.]])
|
||||
|
||||
AC_ARG_WITH(zlib,
|
||||
[[ --without-zlib disable zlib ]])
|
||||
|
||||
AC_ARG_ENABLE(opie,
|
||||
[ --disable-opie disable support for opie or s/key FTP login],
|
||||
ENABLE_OPIE=$enableval, ENABLE_OPIE=yes)
|
||||
@ -246,6 +249,10 @@ dnl
|
||||
dnl Checks for libraries.
|
||||
dnl
|
||||
|
||||
AS_IF([test x"$with_zlib" != xno], [
|
||||
AC_CHECK_LIB(z, compress)
|
||||
])
|
||||
|
||||
AS_IF([test x"$with_ssl" = xopenssl], [
|
||||
dnl some versions of openssl use zlib compression
|
||||
AC_CHECK_LIB(z, compress)
|
||||
@ -269,6 +276,9 @@ AS_IF([test x"$with_ssl" = xopenssl], [
|
||||
AC_CHECK_LIB(ssl32, SSL_connect, [
|
||||
ssl_found=yes
|
||||
AC_MSG_NOTICE([Enabling support for SSL via OpenSSL (shared)])
|
||||
AC_LIBOBJ([openssl])
|
||||
LIBS="${LIBS} -lssl32"
|
||||
AC_DEFINE([HAVE_LIBSSL32], [1], [Define to 1 if you have the `ssl32' library (-lssl32).])
|
||||
],
|
||||
AC_MSG_ERROR([openssl not found: shared lib eay32 found but ssl32 not found]))
|
||||
|
||||
@ -294,6 +304,7 @@ AS_IF([test x$ssl_found != xyes],
|
||||
], [SSL_library_init ()])
|
||||
if test x"$LIBSSL" != x
|
||||
then
|
||||
ssl_found=yes
|
||||
AC_MSG_NOTICE([compiling in support for SSL via OpenSSL])
|
||||
AC_LIBOBJ([openssl])
|
||||
LIBS="$LIBSSL $LIBS"
|
||||
@ -301,9 +312,6 @@ AS_IF([test x$ssl_found != xyes],
|
||||
then
|
||||
AC_MSG_ERROR([--with-ssl=openssl was given, but SSL is not available.])
|
||||
fi
|
||||
|
||||
AC_LIBOBJ([openssl])
|
||||
|
||||
])
|
||||
|
||||
], [
|
||||
@ -321,6 +329,7 @@ AS_IF([test x$ssl_found != xyes],
|
||||
], [gnutls_global_init()])
|
||||
if test x"$LIBGNUTLS" != x
|
||||
then
|
||||
ssl_found=yes
|
||||
AC_MSG_NOTICE([compiling in support for SSL via GnuTLS])
|
||||
AC_LIBOBJ([gnutls])
|
||||
LIBS="$LIBGNUTLS $LIBS"
|
||||
@ -333,7 +342,7 @@ AS_IF([test x$ssl_found != xyes],
|
||||
]) # endif: --with-ssl == openssl?
|
||||
|
||||
dnl Enable NTLM if requested and if SSL is available.
|
||||
if test x"$LIBSSL" != x
|
||||
if test x"$LIBSSL" != x || test "$ac_cv_lib_ssl32_SSL_connect" = yes
|
||||
then
|
||||
if test x"$ENABLE_NTLM" != xno
|
||||
then
|
||||
@ -586,7 +595,31 @@ if test "X$iri" != "Xno"; then
|
||||
fi
|
||||
fi
|
||||
|
||||
dnl
|
||||
dnl Check for UUID
|
||||
dnl
|
||||
|
||||
AC_CHECK_HEADER(uuid/uuid.h,
|
||||
AC_CHECK_LIB(uuid, uuid_generate,
|
||||
[LIBS="${LIBS} -luuid"
|
||||
AC_DEFINE([HAVE_LIBUUID], 1,
|
||||
[Define if libuuid is available.])
|
||||
])
|
||||
)
|
||||
|
||||
dnl
|
||||
dnl Check for PCRE
|
||||
dnl
|
||||
|
||||
AC_CHECK_HEADER(pcre.h,
|
||||
AC_CHECK_LIB(pcre, pcre_compile,
|
||||
[LIBS="${LIBS} -lpcre"
|
||||
AC_DEFINE([HAVE_LIBPCRE], 1,
|
||||
[Define if libpcre is available.])
|
||||
])
|
||||
)
|
||||
|
||||
|
||||
dnl Needed by src/Makefile.am
|
||||
AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"])
|
||||
AM_CONDITIONAL([METALINK_IS_ENABLED], [test "X$metalink" != "Xno"])
|
||||
|
@ -1,3 +1,19 @@
|
||||
2012-08-04 mancha <mancha@mac.hush.com> (tiny change)
|
||||
|
||||
* wget.texi: Export ENVIRONMENT to the man page.
|
||||
|
||||
2012-06-09 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* wget.texi (Logging and Input File Options): Document "--report-speed".
|
||||
(HTTPS (SSL/TLS) Options): Document WARC.
|
||||
|
||||
* texi2pod.pl: Revert change from 2011-08-06.
|
||||
|
||||
2012-05-13 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* wget.texi (Types of Files): Document --accept-regex and
|
||||
--reject-regex.
|
||||
|
||||
2011-10-02 Henrik Holst <henrik.holst@millistream.com> (tiny change)
|
||||
|
||||
* wget.texi (HTTP Options): Document option --content-on-error.
|
||||
|
@ -1,6 +1,7 @@
|
||||
#! /usr/bin/env perl
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2003, 2010 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1999, 2000, 2001, 2003, 2007, 2009, 2010, 2011 Free
|
||||
# Software Foundation, Inc.
|
||||
|
||||
# This file is part of GCC.
|
||||
|
||||
@ -15,14 +16,15 @@
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GCC; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
||||
# Boston MA 02110-1301, USA.
|
||||
# along with GCC. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# This does trivial (and I mean _trivial_) conversion of Texinfo
|
||||
# markup to Perl POD format. It's intended to be used to extract
|
||||
# something suitable for a manpage from a Texinfo document.
|
||||
|
||||
use warnings;
|
||||
BEGIN { eval { require warnings; } and warnings->import; }
|
||||
|
||||
$output = 0;
|
||||
$skipping = 0;
|
||||
%sects = ();
|
||||
@ -36,7 +38,6 @@ $shift = "";
|
||||
$fnno = 1;
|
||||
$inf = "";
|
||||
$ibase = "";
|
||||
@ipath = ();
|
||||
|
||||
while ($_ = shift) {
|
||||
if (/^-D(.*)$/) {
|
||||
@ -52,13 +53,6 @@ while ($_ = shift) {
|
||||
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
|
||||
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
|
||||
$defs{$flag} = $value;
|
||||
} elsif (/^-I(.*)$/) {
|
||||
if ($1 ne "") {
|
||||
$flag = $1;
|
||||
} else {
|
||||
$flag = shift;
|
||||
}
|
||||
push (@ipath, $flag);
|
||||
} elsif (/^-/) {
|
||||
usage();
|
||||
} else {
|
||||
@ -162,8 +156,6 @@ while(<$inf>) {
|
||||
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
|
||||
$_ = "\n=back\n";
|
||||
$ic = pop @icstack;
|
||||
} elsif ($ended eq "multitable") {
|
||||
$_ = "\n=back\n";
|
||||
} else {
|
||||
die "unknown command \@end $ended at line $.\n";
|
||||
}
|
||||
@ -213,18 +205,14 @@ while(<$inf>) {
|
||||
|
||||
# Now the ones that have to be replaced by special escapes
|
||||
# (which will be turned back into text by unmunge())
|
||||
# Replace @@ before @{ and @} in order to parse @samp{@@} correctly.
|
||||
s/&/&/g;
|
||||
s/\@\@/&at;/g;
|
||||
s/\@\{/{/g;
|
||||
s/\@\}/}/g;
|
||||
s/\@`\{(.)\}/&$1grave;/g;
|
||||
|
||||
# Inside a verbatim block, handle @var, @samp and @url specially.
|
||||
# Inside a verbatim block, handle @var specially.
|
||||
if ($shift ne "") {
|
||||
s/\@var\{([^\}]*)\}/<$1>/g;
|
||||
s/\@samp\{([^\}]*)\}/"$1"/g;
|
||||
s/\@url\{([^\}]*)\}/<$1>/g;
|
||||
}
|
||||
|
||||
# POD doesn't interpret E<> inside a verbatim block.
|
||||
@ -243,23 +231,17 @@ while(<$inf>) {
|
||||
$inf = gensym();
|
||||
$file = postprocess($1);
|
||||
|
||||
# Try cwd and $ibase, then explicit -I paths.
|
||||
$done = 0;
|
||||
foreach $path ("", $ibase, @ipath) {
|
||||
$mypath = $file;
|
||||
$mypath = $path . "/" . $mypath if ($path ne "");
|
||||
open($inf, "<" . $mypath) and ($done = 1, last);
|
||||
}
|
||||
die "cannot find $file" if !$done;
|
||||
# Try cwd and $ibase.
|
||||
open($inf, "<" . $file)
|
||||
or open($inf, "<" . $ibase . "/" . $file)
|
||||
or die "cannot open $file or $ibase/$file: $!\n";
|
||||
next;
|
||||
};
|
||||
|
||||
/^\@(?:section|unnumbered|unnumberedsec|center|heading)\s+(.+)$/
|
||||
/^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/
|
||||
and $_ = "\n=head2 $1\n";
|
||||
/^\@subsection\s+(.+)$/
|
||||
and $_ = "\n=head3 $1\n";
|
||||
/^\@subsubsection\s+(.+)$/
|
||||
and $_ = "\n=head4 $1\n";
|
||||
|
||||
# Block command handlers:
|
||||
/^\@itemize(?:\s+(\@[a-z]+|\*|-))?/ and do {
|
||||
@ -268,7 +250,7 @@ while(<$inf>) {
|
||||
if (defined $1) {
|
||||
$ic = $1;
|
||||
} else {
|
||||
$ic = '*';
|
||||
$ic = '@bullet';
|
||||
}
|
||||
$_ = "\n=over 4\n";
|
||||
$endw = "itemize";
|
||||
@ -286,12 +268,6 @@ while(<$inf>) {
|
||||
$endw = "enumerate";
|
||||
};
|
||||
|
||||
/^\@multitable\s.*/ and do {
|
||||
push @endwstack, $endw;
|
||||
$endw = "multitable";
|
||||
$_ = "\n=over 4\n";
|
||||
};
|
||||
|
||||
/^\@([fv]?table)\s+(\@[a-z]+)/ and do {
|
||||
push @endwstack, $endw;
|
||||
push @icstack, $ic;
|
||||
@ -301,7 +277,6 @@ while(<$inf>) {
|
||||
$ic =~ s/\@(?:code|kbd)/C/;
|
||||
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
|
||||
$ic =~ s/\@(?:file)/F/;
|
||||
$ic =~ s/\@(?:asis)//;
|
||||
$_ = "\n=over 4\n";
|
||||
};
|
||||
|
||||
@ -312,29 +287,14 @@ while(<$inf>) {
|
||||
$_ = ""; # need a paragraph break
|
||||
};
|
||||
|
||||
/^\@item\s+(.*\S)\s*$/ and $endw eq "multitable" and do {
|
||||
@columns = ();
|
||||
for $column (split (/\s*\@tab\s*/, $1)) {
|
||||
# @strong{...} is used a @headitem work-alike
|
||||
$column =~ s/^\@strong{(.*)}$/$1/;
|
||||
push @columns, $column;
|
||||
}
|
||||
$_ = "\n=item ".join (" : ", @columns)."\n";
|
||||
};
|
||||
|
||||
/^\@itemx?\s*(.+)?$/ and do {
|
||||
if (defined $1) {
|
||||
if ($ic) {
|
||||
if ($endw eq "enumerate") {
|
||||
$_ = "\n=item $ic $1\n";
|
||||
$ic =~ s/(\d+)/$1 + 1/eg;
|
||||
} else {
|
||||
# Entity escapes prevent munging by the <>
|
||||
# processing below.
|
||||
$_ = "\n=item $ic\<$1\>\n";
|
||||
}
|
||||
my $thing = $1;
|
||||
if ($ic =~ /\@asis/) {
|
||||
$_ = "\n=item $thing\n";
|
||||
} else {
|
||||
$_ = "\n=item $1\n";
|
||||
# Entity escapes prevent munging by the <> processing below.
|
||||
$_ = "\n=item $ic\<$thing\>\n";
|
||||
}
|
||||
} else {
|
||||
$_ = "\n=item $ic\n";
|
||||
@ -355,11 +315,12 @@ die "No filename or title\n" unless defined $fn && defined $tl;
|
||||
$sects{NAME} = "$fn \- $tl\n";
|
||||
$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
|
||||
|
||||
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS ENVIRONMENT FILES
|
||||
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
|
||||
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS ENVIRONMENT EXITSTATUS
|
||||
FILES BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
|
||||
if(exists $sects{$sect}) {
|
||||
$head = $sect;
|
||||
$head =~ s/SEEALSO/SEE ALSO/;
|
||||
$head =~ s/EXITSTATUS/EXIT STATUS/;
|
||||
print "=head1 $head\n\n";
|
||||
print scalar unmunge ($sects{$sect});
|
||||
print "\n";
|
||||
@ -391,13 +352,11 @@ sub postprocess
|
||||
s/\@r\{([^\}]*)\}/R<$1>/g;
|
||||
s/\@(?:dfn|var|emph|cite|i)\{([^\}]*)\}/I<$1>/g;
|
||||
s/\@(?:code|kbd)\{([^\}]*)\}/C<$1>/g;
|
||||
s/\@(?:samp|strong|key|option|env|command|b)\{([^\}]*)\}/B<$1>/g;
|
||||
s/\@(?:gccoptlist|samp|strong|key|option|env|command|b)\{([^\}]*)\}/B<$1>/g;
|
||||
s/\@sc\{([^\}]*)\}/\U$1/g;
|
||||
s/\@acronym\{([^\}]*)\}/\U$1/g;
|
||||
s/\@file\{([^\}]*)\}/F<$1>/g;
|
||||
s/\@w\{([^\}]*)\}/S<$1>/g;
|
||||
s/\@(?:dmn|math)\{([^\}]*)\}/$1/g;
|
||||
s/\@\///g;
|
||||
|
||||
# keep references of the form @ref{...}, print them bold
|
||||
s/\@(?:ref)\{([^\}]*)\}/B<$1>/g;
|
||||
@ -419,9 +378,6 @@ sub postprocess
|
||||
s/\@gol//g;
|
||||
s/\@\*\s*\n?//g;
|
||||
|
||||
# Anchors are thrown away
|
||||
s/\@anchor\{(?:[^\}]*)\}//g;
|
||||
|
||||
# @uref can take one, two, or three arguments, with different
|
||||
# semantics each time. @url and @email are just like @uref with
|
||||
# one argument, for our purposes.
|
||||
@ -429,10 +385,6 @@ sub postprocess
|
||||
s/\@uref\{([^\},]*),([^\},]*)\}/$2 (C<$1>)/g;
|
||||
s/\@uref\{([^\},]*),([^\},]*),([^\},]*)\}/$3/g;
|
||||
|
||||
# Handle gccoptlist here, so it can contain the above formatting
|
||||
# commands.
|
||||
s/\@gccoptlist\{([^\}]*)\}/B<$1>/g;
|
||||
|
||||
# Un-escape <> at this point.
|
||||
s/</</g;
|
||||
s/>/>/g;
|
||||
@ -466,7 +418,6 @@ sub unmunge
|
||||
# Replace escaped symbols with their equivalents.
|
||||
local $_ = $_[0];
|
||||
|
||||
s/&(.)grave;/E<$1grave>/g;
|
||||
s/</E<lt>/g;
|
||||
s/>/E<gt>/g;
|
||||
s/{/\{/g;
|
||||
|
@ -479,6 +479,10 @@ Turn off verbose without being completely quiet (use @samp{-q} for
|
||||
that), which means that error messages and basic information still get
|
||||
printed.
|
||||
|
||||
@item -nv
|
||||
@itemx --report-speed=@var{type}
|
||||
Output bandwidth as @var{type}. The only accepted value is @samp{bits}.
|
||||
|
||||
@cindex input-file
|
||||
@item -i @var{file}
|
||||
@itemx --input-file=@var{file}
|
||||
@ -1658,6 +1662,36 @@ not used), EGD is never contacted. EGD is not needed on modern Unix
|
||||
systems that support @file{/dev/random}.
|
||||
@end table
|
||||
|
||||
@cindex WARC
|
||||
@table @samp
|
||||
@item --warc-file=@var{file}
|
||||
Use @var{file} as the destination WARC file.
|
||||
|
||||
@item --warc-header=@var{string}
|
||||
Use @var{string} into as the warcinfo record.
|
||||
|
||||
@item --warc-max-size=@var{size}
|
||||
Set the maximum size of the WARC files to @var{size}.
|
||||
|
||||
@item --warc-cdx
|
||||
Write CDX index files.
|
||||
|
||||
@item --warc-dedup=@var{file}
|
||||
Do not store records listed in this CDX file.
|
||||
|
||||
@item --no-warc-compression
|
||||
Do not compress WARC files with GZIP.
|
||||
|
||||
@item --no-warc-digests
|
||||
Do not calculate SHA1 digests.
|
||||
|
||||
@item --no-warc-keep-log
|
||||
Do not store the log file in a WARC record.
|
||||
|
||||
@item --warc-tempdir=@var{dir}
|
||||
Specify the location for temporary files created by the WARC writer.
|
||||
@end table
|
||||
|
||||
@node FTP Options, Recursive Retrieval Options, HTTPS (SSL/TLS) Options, Invoking
|
||||
@section FTP Options
|
||||
|
||||
@ -2284,6 +2318,8 @@ in @file{.wgetrc}.
|
||||
@item -A @var{acclist}
|
||||
@itemx --accept @var{acclist}
|
||||
@itemx accept = @var{acclist}
|
||||
@itemx --accept-regex @var{urlregex}
|
||||
@itemx accept-regex = @var{urlregex}
|
||||
The argument to @samp{--accept} option is a list of file suffixes or
|
||||
patterns that Wget will download during recursive retrieval. A suffix
|
||||
is the ending part of a file, and consists of ``normal'' letters,
|
||||
@ -2300,6 +2336,9 @@ a description of how pattern matching works.
|
||||
Of course, any number of suffixes and patterns can be combined into a
|
||||
comma-separated list, and given as an argument to @samp{-A}.
|
||||
|
||||
The argument to @samp{--accept-regex} option is a regular expression which
|
||||
is matched against the complete URL.
|
||||
|
||||
@cindex reject wildcards
|
||||
@cindex reject suffixes
|
||||
@cindex wildcards, reject
|
||||
@ -2307,6 +2346,8 @@ comma-separated list, and given as an argument to @samp{-A}.
|
||||
@item -R @var{rejlist}
|
||||
@itemx --reject @var{rejlist}
|
||||
@itemx reject = @var{rejlist}
|
||||
@itemx --reject-regex @var{urlregex}
|
||||
@itemx reject-regex = @var{urlregex}
|
||||
The @samp{--reject} option works the same way as @samp{--accept}, only
|
||||
its logic is the reverse; Wget will download all files @emph{except} the
|
||||
ones matching the suffixes (or patterns) in the list.
|
||||
@ -2318,6 +2359,9 @@ Analogously, to download all files except the ones beginning with
|
||||
expansion by the shell.
|
||||
@end table
|
||||
|
||||
The argument to @samp{--accept-regex} option is a regular expression which
|
||||
is matched against the complete URL.
|
||||
|
||||
@noindent
|
||||
The @samp{-A} and @samp{-R} options may be combined to achieve even
|
||||
better fine-tuning of which files to retrieve. E.g. @samp{wget -A
|
||||
@ -3532,28 +3576,30 @@ internal networks from the rest of Internet. In order to obtain
|
||||
information from the Web, their users connect and retrieve remote data
|
||||
using an authorized proxy.
|
||||
|
||||
@c man begin ENVIRONMENT
|
||||
Wget supports proxies for both @sc{http} and @sc{ftp} retrievals. The
|
||||
standard way to specify proxy location, which Wget recognizes, is using
|
||||
the following environment variables:
|
||||
|
||||
@table @code
|
||||
@table @env
|
||||
@item http_proxy
|
||||
@itemx https_proxy
|
||||
If set, the @code{http_proxy} and @code{https_proxy} variables should
|
||||
If set, the @env{http_proxy} and @env{https_proxy} variables should
|
||||
contain the @sc{url}s of the proxies for @sc{http} and @sc{https}
|
||||
connections respectively.
|
||||
|
||||
@item ftp_proxy
|
||||
This variable should contain the @sc{url} of the proxy for @sc{ftp}
|
||||
connections. It is quite common that @code{http_proxy} and
|
||||
@code{ftp_proxy} are set to the same @sc{url}.
|
||||
connections. It is quite common that @env{http_proxy} and
|
||||
@env{ftp_proxy} are set to the same @sc{url}.
|
||||
|
||||
@item no_proxy
|
||||
This variable should contain a comma-separated list of domain extensions
|
||||
proxy should @emph{not} be used for. For instance, if the value of
|
||||
@code{no_proxy} is @samp{.mit.edu}, proxy will not be used to retrieve
|
||||
@env{no_proxy} is @samp{.mit.edu}, proxy will not be used to retrieve
|
||||
documents from MIT.
|
||||
@end table
|
||||
@c man end
|
||||
|
||||
In addition to the environment variables, proxy location and settings
|
||||
may be specified from within Wget itself.
|
||||
|
347
src/ChangeLog
347
src/ChangeLog
@ -1,3 +1,350 @@
|
||||
2012-07-03 Steven Schubiger <stsc@member.fsf.org>
|
||||
|
||||
* init.c: Include warc.h for warc_close in cleanup function.
|
||||
|
||||
2012-07-08 Steven Schubiger <stsc@member.fsf.org>
|
||||
|
||||
* exits.h: Fix comment.
|
||||
* exits.c: Likewise.
|
||||
|
||||
2012-07-07 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
(digest_authentication_encode): Add support for RFC 2617 Digest
|
||||
Access Authentication.
|
||||
|
||||
2012-07-07 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* http.c (http_loop): Fix log message.
|
||||
* main.c (main): Likewise.
|
||||
Reported by: Petr Pisar <petr.pisar@atlas.cz>
|
||||
|
||||
2012-06-17 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* wget.h: Define `CLOSEFAILED'.
|
||||
* init.c: Include "exits.h".
|
||||
(cleanup): Check `fclose' failure.
|
||||
* exits.c (get_status_for_err): Handle `CLOSEFAILED'.
|
||||
|
||||
2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* main.c (main): Move some cleanup related function to...
|
||||
* init.c (cleanup): ...here.
|
||||
|
||||
* main.c: Do not include "stdout.h".
|
||||
(main): Do not register `close_stdout' at exit.
|
||||
Reported by: Micah Cowan <micah@cowan.name>.
|
||||
|
||||
2012-06-09 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* main.c (print_help): Move --report-speed under the section
|
||||
"Logging and input file".
|
||||
|
||||
2012-06-06 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* main.c (print_help): Rename --bits to --report-bps.
|
||||
(cmdline_options): Likewise.
|
||||
* init.c (commands): Rename --report-bps to --report-speed.
|
||||
(cmd_spec_report_speed): New function.
|
||||
|
||||
* options.h (struct options): Rename `bits_fmt' to `report_bps'.
|
||||
* main.c (print_help): Rename --bits to --report-bps.
|
||||
(cmdline_options): Likewise.
|
||||
* init.c (commands): Likewise
|
||||
|
||||
* progress.c (create_image): Adjust caller.
|
||||
* retr.c (retr_rate): Likewise.
|
||||
* utils.c (convert_to_bits): Likewise.
|
||||
|
||||
2012-06-04 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* main.c (main): Check for filename != NULL.
|
||||
* warc.c (warc_process_cdx_line): Fix memory leak.
|
||||
* utils.c (match_posix_regex, compile_posix_regex): Remove dead
|
||||
assignment.
|
||||
* openssl.c (ssl_init): Fix old-style function definition.
|
||||
|
||||
2012-06-02 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* connect.c: Include <sys/socket.h> and <sys/select.h>.
|
||||
|
||||
2012-05-30 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* warc.c: Fix segfault if CDX record is not found.
|
||||
|
||||
2011-05-26 Steven Schweda <sms@antinode.info>
|
||||
* connect.c [HAVE_SYS_SOCKET_H]: Include <sys/socket.h>.
|
||||
[HAVE_SYS_SELECT_H]: Include <sys/select.h>.
|
||||
|
||||
2012-05-26 Mike Frysinger <vapier@gentoo.org>
|
||||
|
||||
* warc.c: Change type of `warc_current_gzfile' to gzFile.
|
||||
|
||||
2012-05-26 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* warc.c (warc_load_cdx_dedup_file): Change type of `line_length' to
|
||||
ssize_t.
|
||||
Suggested by: Ángel González <keisial@gmail.com>
|
||||
|
||||
2012-05-18 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* gnutls.c (wgnutls_poll): Honor the specified `timeout' value.
|
||||
(wgnutls_peek): Likewise.
|
||||
|
||||
2012-05-19 illusionoflife <illusion.of.life92@gmail.com> (tiny change)
|
||||
|
||||
* convert.c (register_html,register_css): Fixed functions signature to
|
||||
not accept unused argument
|
||||
* retr.c (retrieve_url): Changed register_{css,html} usage according
|
||||
new signature.
|
||||
|
||||
2012-05-16 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* warc.h: Cut length lines to 80 columns.
|
||||
* warc.c: Likewise.
|
||||
|
||||
2012-05-14 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* gnutls.c (wgnutls_read_timeout): removed warnings, moved fcntl stuff
|
||||
outside loop.
|
||||
|
||||
* hash.h (hash_table_put): Make argument "value" const.
|
||||
* hash.c (hash_table_put): Make argument value const. Cast `value' to
|
||||
void.
|
||||
* http.c (request_set_header): Make argument `name' const. Cast `value'
|
||||
and `name' to void*.
|
||||
(request_remove_header): Make argument `name' const.
|
||||
* url.c (url_file_name): Make `index_filename' static.
|
||||
* warc.h (warc_write_cdx_record): Make `url', `timestamp', `mime_type',
|
||||
`payload_digest', `redirect_location', `warc_filename', response_uuid'
|
||||
arguments const. Make `checksum' const.
|
||||
* warc.c (warc_write_date_header): Make the `timestamp' argument const.
|
||||
Make `extension' const.
|
||||
(warc_write_cdx_record): Make `url', `timestamp', `mime_type',
|
||||
`payload_digest', `redirect_location', `warc_filename', response_uuid'
|
||||
arguments const. Make `checksum' const.
|
||||
|
||||
2012-05-13 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* gnutls.c (credentials): Change type to
|
||||
gnutls_certificate_credentials_t.
|
||||
(ssl_init): Do not use deprecated types.
|
||||
(ssl_connect_wget): Likewise.
|
||||
|
||||
2012-04-11 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* init.c: Add --accept-regex, --reject-regex and --regex-type.
|
||||
* main.c: Likewise.
|
||||
* options.c: Likewise.
|
||||
* recur.c: Likewise.
|
||||
* utils.c: Add regex-related functions.
|
||||
* utils.h: Add regex-related functions.
|
||||
|
||||
2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* convert.c (convert_links_in_hashtable): Mmake it static.
|
||||
* cookies.c (parse_set_cookie): Remove empty else branches.
|
||||
* css-url.c: Include "css-url.h".
|
||||
(get_uri_string): Make it static.
|
||||
* css-url.h (get_urls_css): Add protoype.
|
||||
* gnutls.c (ssl_init): Add prototype.
|
||||
* html-parse.c (tagstack_push): Make it static.
|
||||
* html-parse.c (tagstack_pop): Make it static.
|
||||
* html-parse.c (tagstack_find): Make it static.
|
||||
* html-url.c (cleanup_html_url): Make it static.
|
||||
* progress.c (count_cols): Make it static.
|
||||
* progress.c (get_eta): Make it static.
|
||||
* retr.h (convert_to_bits): Remove prototype.
|
||||
* util.h (convert_to_bits): Add prototype.
|
||||
* spider.c (spider_cleanup): Make it static.
|
||||
* warc.c (warc_write_start_record): Add prototype.
|
||||
* warc.c (warc_write_end_record): Add prototype.
|
||||
* warc.c (warc_start_cdx_file): Add prototype.
|
||||
* warc.c (warc_init): Add prototype.
|
||||
* warc.c (warc_load_cdx_dedup_file): Add prototype.
|
||||
* warc.c (warc_write_metadata): Add prototype.
|
||||
* warc.c (warc_close): Add prototype.
|
||||
* warc.c (warc_tempfile): Add prototype.
|
||||
* warc.c (warc_write_warcinfo_record): Make it static.
|
||||
* warc.c (warc_load_cdx_dedup_file): Make it static.
|
||||
* warc.c (warc_write_metadata): Make it static.
|
||||
* warc.h (warc_init): Fix prototype.
|
||||
* warc.h (warc_close): Fix prototype.
|
||||
* warc.h (warc_tempfile): Fix prototype.
|
||||
|
||||
2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* url.c: Use empty query in local filenames.
|
||||
|
||||
2012-04-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* main.c (main): Dynamically allocate `opt.progress_type'.
|
||||
|
||||
2012-04-21 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* ftp-basic.c (ftp_pasv): Fix memory leak.
|
||||
|
||||
* http.c (gethttp): Fix memory leak.
|
||||
|
||||
* ftp.c (getftp): Silent compiler warning.
|
||||
|
||||
2009-06-14 Phil Pennock <mutt-dev@spodhuis.org> (tiny change)
|
||||
* host.h: Declare `is_valid_ip_address'.
|
||||
* host.c (is_valid_ip_address): New function.
|
||||
* http.c (gethttp): Specify the hostname to ssl_connect_wget.
|
||||
* gnutls.c (ssl_connect_wget): Specify the server name.
|
||||
* openssl.c (ssl_connect_wget): Likewise.
|
||||
* ssl.h: Change method signature for ssl_connect_wget.
|
||||
|
||||
2012-04-13 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
|
||||
|
||||
* warc.c (warc_load_cdx_dedup_file): Fix a memory leak by freeing
|
||||
`lineptr'.
|
||||
|
||||
2012-04-07 Daniel Kahn Gillmor <dkg@fifthhorseman.net> (tiny change)
|
||||
|
||||
* gnutls.c (key_type_to_gnutls_type): New function.
|
||||
(ssl_init): Use correctly the specified gnutls certificate.
|
||||
|
||||
2012-04-01 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* html-url.c: Prevent crash on incomplete STYLE tag.
|
||||
|
||||
2012-04-01 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* gnutls.c (wgnutls_read_timeout): Ensure timer is freed.
|
||||
|
||||
* gnutls.c (wgnutls_read_timeout): Do not use timer if it is not
|
||||
allocated.
|
||||
Reported by: Xu Zhongxing <xu_zhong_xing@163.com>
|
||||
|
||||
2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
|
||||
|
||||
* warc.c: make warc_uuid_str() implementation depend on HAVE_LIBUUID.
|
||||
|
||||
2012-03-29 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
|
||||
|
||||
* utils.c (library): Include <sys/time.h>.
|
||||
|
||||
2012-03-25 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* utils.c: Include <sys/ioctl.h>.
|
||||
|
||||
* ptimer.c: Include <sys/time.h>.
|
||||
|
||||
* connect.c: Include <sys/socket.h>, <sys/select.h>, <sys/time.h>.
|
||||
Reported by: Ray Satiro <raysatiro@yahoo.com>.
|
||||
|
||||
2012-03-25 Ray Satiro <raysatiro@yahoo.com>
|
||||
|
||||
* build_info.c.in: Check that HAVE_LIBSSL32 is defined when OpenSSL
|
||||
is used.
|
||||
|
||||
2012-03-07 Steven Schubiger <stsc@member.fsf.org>
|
||||
|
||||
* init.c (wgetrc_user_file_name): Correct typo.
|
||||
|
||||
2012-03-06 Sasikantha Babu <sasikanth.v19@gmail.com>
|
||||
|
||||
* utils.c (convert_to_bits): Added new function convert_to_bits to
|
||||
convert bytes to bits.
|
||||
* retr.c (calc_rate): Modified the function to handle --bits
|
||||
option and download rate calculated as bits per sec (SI-prefix)
|
||||
for --bits otherwise bytes (IEC-prefix).
|
||||
(retr_rate): Rates will display in bits per sec for --bits.
|
||||
* options.h (struct opt): Added --bit option bool variable bits_fmt.
|
||||
* main.c (print_help) : Added help for --bit.
|
||||
* init.c: Defined command for --bit option.
|
||||
* retr.h: Added function prototype.
|
||||
|
||||
2012-02-26 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* main.c: Include "closeout.h"
|
||||
(main): Register close_stdout at exit.
|
||||
|
||||
2012-02-01 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* warc.c: Fix large file support with ftello, fseeko.
|
||||
* warc.h: Fix large file support.
|
||||
* http.c: Fix large file support.
|
||||
|
||||
2012-02-23 Giuseppe Scrivano <giuseppe@southpole.se>
|
||||
|
||||
* main.c (main): Write diagnostic messages to `stderr' not to `stdout'.
|
||||
|
||||
* main.c (main): Fail gracefully if `malloc' fails.
|
||||
|
||||
* gnutls.c (wgnutls_read): Remove unused variables `timer' and `flags'.
|
||||
|
||||
2012-02-17 Steven Schubiger <stsc@member.fsf.org>
|
||||
|
||||
* warc.c: Add license header.
|
||||
|
||||
2012-01-27 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* retr.c (fd_read_body): If the response is chunked, the chunk
|
||||
headers are now written to the WARC file, making the WARC file
|
||||
an exact copy of the HTTP response.
|
||||
|
||||
2012-01-27 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* retr.c (fd_read_body): Fix a memory leak with chunked responses.
|
||||
* http.c (skip_short_body): Fix the same memory leak.
|
||||
|
||||
2012-01-09 Gijs van Tulder <gvtulder@gmail.com>
|
||||
|
||||
* init.c: Disable WARC compression if zlib is disabled.
|
||||
* main.c: Do not show the 'no-warc-compression' option if zlib is
|
||||
disabled.
|
||||
* warc.c: Do not compress WARC files if zlib is disabled.
|
||||
|
||||
2012-01-09 Sasikantha Babu <sasikanth.v19@gmail.com> (tiny change)
|
||||
* connect.c (connect_to_ip): properly formatted ipv6 address display.
|
||||
(socket_family): New function - returns socket family type.
|
||||
* http.c (gethttp): properly formatted ipv6 address display.
|
||||
|
||||
2011-11-09 Gijs van Tulder <address@hidden>
|
||||
|
||||
* warc.c: Call gzdopen() with wb9 instead of wb+9, which fails on
|
||||
zlib version >= 1.2.4.
|
||||
|
||||
2011-11-04 Steven Schweda <address@hidden>
|
||||
|
||||
* warc.c [! WINDOWS]: Include <libgen.h>.
|
||||
(warc_write_warcinfo_record): Assign a new allocated buffer and
|
||||
free it on errors.
|
||||
|
||||
2011-11-01 Steven Schweda <address@hidden>
|
||||
|
||||
* gnutls.c (ssl_init): Ensure GNU TLS is loaded only once.
|
||||
|
||||
2011-10-07 Steven Schweda <address@hidden>
|
||||
|
||||
* connect.c: Add HAVE_SYS_SELECT_H and HAVE_SYS_SOCKET_H conditions
|
||||
on includes of <sys/select.h> and <sys/socket.h>, respectively.
|
||||
* ftp.c (getftp): Move BIN_TYPE_TRANSFER macro into VMS-specific
|
||||
section. On VMS, use Stream_LF attributes for listing files. Pass
|
||||
BIN_TYPE_FILE to fopen_excl() instead of constant-everywhere "true".
|
||||
* ftp.c (ftp_retrieve_list): Restore lost test of opt.preserve_perm
|
||||
(--preserve-permissions) on the chmod() operation.
|
||||
* init.c, main.c: Remove "deprecated" from opt.preserve_perm
|
||||
(--preserve-permissions).
|
||||
* init.c (initialize): Use distinct messages for errors in C macro
|
||||
SYSTEM_WGETRC and environment-variable SYSTEM_WGETRC. Avoid use of
|
||||
C macro SYSTEM_WGETRC when it's not defined.
|
||||
* log.c (log_close): Avoid closing logfp when it's stderr.
|
||||
* main.c (print_help): Restore --preserve-permissions.
|
||||
* main.c (main): Avoid using a negative value of longindex as a
|
||||
subscript (for long_options[]) when searching for "--config".
|
||||
* main.c (main): Exit the program using exit() instead of "return".
|
||||
(VMS handles these differently, and exit() is better.)
|
||||
* openssl.c (ssl_init): Add type cast (SSL_METHOD *) to newly "const"
|
||||
"meth" argument to accommodate OpenSSL version 0.9.8, where that
|
||||
argument is not "const" in the OpenSSL function (SSL_CTX_new).
|
||||
* test.c: Declare "program_argstring".
|
||||
* utils.c (fopen_excl): Comment typography.
|
||||
* warc.h: New file.
|
||||
* warc.c: New file.
|
||||
|
||||
2011-10-02 Henrik Holst <henrik.holst@millistream.com> (tiny change)
|
||||
* http.c (gethttp): If 'contentonerror' is used then do not
|
||||
skip the http body on 4xx and 5xx errors.
|
||||
|
@ -36,9 +36,11 @@ IRI_OBJ = iri.c
|
||||
endif
|
||||
if METALINK_IS_ENABLED
|
||||
METALINK_OBJ = metalink.c
|
||||
METALINK_HEADER = metalink.h
|
||||
endif
|
||||
if THREADS_ARE_ENABLED
|
||||
THREAD_OBJ = multi.c
|
||||
MULTI_HEADER = multi.h
|
||||
endif
|
||||
|
||||
# The following line is losing on some versions of make!
|
||||
@ -52,17 +54,18 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \
|
||||
css_.c css-url.c \
|
||||
ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
|
||||
http.c init.c log.c main.c netrc.c progress.c ptimer.c \
|
||||
recur.c res.c retr.c spider.c url.c \
|
||||
utils.c exits.c build_info.c $(IRI_OBJ) \
|
||||
recur.c res.c retr.c spider.c url.c warc.c \
|
||||
utils.c exits.c build_info.c $(IRI_OBJ) \
|
||||
$(THREAD_OBJ) $(METALINK_OBJ) \
|
||||
css-url.h css-tokens.h connect.h convert.h cookies.h \
|
||||
ftp.h hash.h host.h html-parse.h html-url.h \
|
||||
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
|
||||
options.h progress.h ptimer.h recur.h res.h retr.h \
|
||||
spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \
|
||||
exits.h gettext.h metalink.h multi.h
|
||||
spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \
|
||||
exits.h gettext.h $(THREAD_HEADER) $(METALINK_HEADER)
|
||||
|
||||
nodist_wget_SOURCES = version.c
|
||||
EXTRA_wget_SOURCES = iri.c metalink.c
|
||||
EXTRA_wget_SOURCES = iri.c metalink.c multi.c
|
||||
LDADD = $(LIBOBJS) ../lib/libgnu.a
|
||||
AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
|
||||
|
||||
|
@ -11,5 +11,5 @@ threads defined ENABLE_THREADS
|
||||
metalink defined ENABLE_METALINK
|
||||
|
||||
ssl choice:
|
||||
openssl defined HAVE_LIBSSL
|
||||
openssl defined HAVE_LIBSSL || defined HAVE_LIBSSL32
|
||||
gnutls defined HAVE_LIBGNUTLS
|
||||
|
@ -53,9 +53,7 @@ as that of the covered work. */
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include "utils.h"
|
||||
#include "host.h"
|
||||
#include "connect.h"
|
||||
@ -293,7 +291,12 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
|
||||
xfree (str);
|
||||
}
|
||||
else
|
||||
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
|
||||
{
|
||||
if (ip->family == AF_INET)
|
||||
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
|
||||
else if (ip->family == AF_INET6)
|
||||
logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port);
|
||||
}
|
||||
}
|
||||
|
||||
/* Store the sockaddr info to SA. */
|
||||
@ -581,6 +584,36 @@ socket_ip_address (int sock, ip_address *ip, int endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the socket family of connection on FD and store
|
||||
Return family type on success, -1 otherwise.
|
||||
|
||||
If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local
|
||||
(client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it
|
||||
returns the sock family of the remote (peer's) side of the socket. */
|
||||
|
||||
int
|
||||
socket_family (int sock, int endpoint)
|
||||
{
|
||||
struct sockaddr_storage storage;
|
||||
struct sockaddr *sockaddr = (struct sockaddr *) &storage;
|
||||
socklen_t addrlen = sizeof (storage);
|
||||
int ret;
|
||||
|
||||
memset (sockaddr, 0, addrlen);
|
||||
|
||||
if (endpoint == ENDPOINT_LOCAL)
|
||||
ret = getsockname (sock, sockaddr, &addrlen);
|
||||
else if (endpoint == ENDPOINT_PEER)
|
||||
ret = getpeername (sock, sockaddr, &addrlen);
|
||||
else
|
||||
abort ();
|
||||
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
|
||||
return sockaddr->sa_family;
|
||||
}
|
||||
|
||||
/* Return true if the error from the connect code can be considered
|
||||
retryable. Wget normally retries after errors, but the exception
|
||||
are the "unsupported protocol" type errors (possible on IPv4/IPv6
|
||||
|
@ -51,6 +51,7 @@ enum {
|
||||
ENDPOINT_PEER
|
||||
};
|
||||
bool socket_ip_address (int, ip_address *, int);
|
||||
int socket_family (int sock, int endpoint);
|
||||
|
||||
bool retryable_socket_connect_error (int);
|
||||
|
||||
|
@ -87,7 +87,7 @@ static pthread_mutex_t convert_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static void convert_links (const char *, struct urlpos *);
|
||||
|
||||
|
||||
void
|
||||
static void
|
||||
convert_links_in_hashtable (struct hash_table *downloaded_set,
|
||||
int is_css,
|
||||
int *file_count)
|
||||
@ -153,6 +153,9 @@ convert_links_in_hashtable (struct hash_table *downloaded_set,
|
||||
set_uri_encoding (pi, opt.locale, true);
|
||||
|
||||
u = url_parse (cur_url->url->url, NULL, pi, true);
|
||||
if (!u)
|
||||
continue;
|
||||
|
||||
local_name = hash_table_get (dl_url_file_map, u->url);
|
||||
|
||||
/* Decide on the conversion type. */
|
||||
|
@ -101,8 +101,8 @@ downloaded_file_t downloaded_file (downloaded_file_t, const char *);
|
||||
|
||||
void register_download (const char *, const char *);
|
||||
void register_redirection (const char *, const char *);
|
||||
void register_html (const char *, const char *);
|
||||
void register_css (const char *, const char *);
|
||||
void register_html (const char *);
|
||||
void register_css (const char *);
|
||||
void register_delete_file (const char *);
|
||||
void convert_all_links (void);
|
||||
void convert_cleanup (void);
|
||||
|
@ -391,6 +391,9 @@ parse_set_cookie (const char *set_cookie, bool silent)
|
||||
goto error;
|
||||
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
|
||||
|
||||
/* Check if expiration spec is valid.
|
||||
If not, assume default (cookie doesn't expire, but valid only for
|
||||
this session.) */
|
||||
expires = http_atotm (value_copy);
|
||||
if (expires != (time_t) -1)
|
||||
{
|
||||
@ -402,10 +405,6 @@ parse_set_cookie (const char *set_cookie, bool silent)
|
||||
if (cookie->expiry_time < cookies_now)
|
||||
cookie->discard_requested = 1;
|
||||
}
|
||||
else
|
||||
/* Error in expiration spec. Assume default (cookie doesn't
|
||||
expire, but valid only for this session.) */
|
||||
;
|
||||
}
|
||||
else if (TOKEN_IS (name, "max-age"))
|
||||
{
|
||||
@ -433,9 +432,7 @@ parse_set_cookie (const char *set_cookie, bool silent)
|
||||
/* ignore value completely */
|
||||
cookie->secure = 1;
|
||||
}
|
||||
else
|
||||
/* Ignore unrecognized attribute. */
|
||||
;
|
||||
/* else: Ignore unrecognized attribute. */
|
||||
}
|
||||
if (*ptr)
|
||||
/* extract_param has encountered a syntax error */
|
||||
|
@ -55,6 +55,7 @@ as that of the covered work. */
|
||||
#include "convert.h"
|
||||
#include "html-url.h"
|
||||
#include "css-tokens.h"
|
||||
#include "css-url.h"
|
||||
|
||||
/* from lex.yy.c */
|
||||
extern char *yytext;
|
||||
@ -107,7 +108,7 @@ const char *token_names[] = {
|
||||
whitespace after the opening parenthesis and before the closing
|
||||
parenthesis.
|
||||
*/
|
||||
char *
|
||||
static char *
|
||||
get_uri_string (const char *at, int *pos, int *length)
|
||||
{
|
||||
char *uri;
|
||||
|
@ -30,6 +30,7 @@ as that of the covered work. */
|
||||
#ifndef CSS_URL_H
|
||||
#define CSS_URL_H
|
||||
|
||||
void get_urls_css (struct map_context *, int, int);
|
||||
void get_urls_css (struct map_context *, int, int);
|
||||
struct urlpos *get_urls_css_file (const char *, const char *);
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
/* Command line parsing.
|
||||
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
Inc.
|
||||
/* Exit status handling.
|
||||
Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
|
||||
@ -60,7 +58,7 @@ get_status_for_err (uerr_t err)
|
||||
case RETROK:
|
||||
return WGET_EXIT_SUCCESS;
|
||||
case FOPENERR: case FOPEN_EXCL_ERR: case FWRITEERR: case WRITEFAILED:
|
||||
case UNLINKERR:
|
||||
case UNLINKERR: case CLOSEFAILED:
|
||||
return WGET_EXIT_IO_FAIL;
|
||||
case NOCONERROR: case HOSTERR: case CONSOCKERR: case CONERROR:
|
||||
case CONSSLERR: case CONIMPOSSIBLE: case FTPRERR: case FTPINVPASV:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Internationalization related declarations.
|
||||
Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
/* Exit status related declarations.
|
||||
Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
|
||||
|
@ -524,7 +524,10 @@ ftp_pasv (int csock, ip_address *addr, int *port)
|
||||
for (s += 4; *s && !c_isdigit (*s); s++)
|
||||
;
|
||||
if (!*s)
|
||||
return FTPINVPASV;
|
||||
{
|
||||
xfree (respline);
|
||||
return FTPINVPASV;
|
||||
}
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
tmp[i] = 0;
|
||||
@ -593,7 +596,10 @@ ftp_lpsv (int csock, ip_address *addr, int *port)
|
||||
for (s += 4; *s && !c_isdigit (*s); s++)
|
||||
;
|
||||
if (!*s)
|
||||
return FTPINVPASV;
|
||||
{
|
||||
xfree (respline);
|
||||
return FTPINVPASV;
|
||||
}
|
||||
|
||||
/* First, get the address family */
|
||||
af = 0;
|
||||
|
125
src/ftp.c
125
src/ftp.c
@ -49,6 +49,7 @@ as that of the covered work. */
|
||||
#include "netrc.h"
|
||||
#include "convert.h" /* for downloaded_file */
|
||||
#include "recur.h" /* for INFINITE_RECURSION */
|
||||
#include "warc.h"
|
||||
|
||||
#ifdef __VMS
|
||||
# include "vms.h"
|
||||
@ -237,17 +238,17 @@ static uerr_t ftp_get_listing (struct url *, ccon *, struct fileinfo **);
|
||||
|
||||
/* Retrieves a file with denoted parameters through opening an FTP
|
||||
connection to the server. It always closes the data connection,
|
||||
and closes the control connection in case of error. */
|
||||
and closes the control connection in case of error. If warc_tmp
|
||||
is non-NULL, the downloaded data will be written there as well. */
|
||||
static uerr_t
|
||||
getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
|
||||
wgint restval, ccon *con, int count)
|
||||
wgint restval, ccon *con, int count, FILE *warc_tmp)
|
||||
{
|
||||
int csock, dtsock, local_sock, res;
|
||||
uerr_t err = RETROK; /* appease the compiler */
|
||||
FILE *fp;
|
||||
char *user, *passwd, *respline;
|
||||
char *tms;
|
||||
const char *tmrate;
|
||||
char *respline, *tms;
|
||||
const char *user, *passwd, *tmrate;
|
||||
int cmd = con->cmd;
|
||||
bool pasv_mode_open = false;
|
||||
wgint expected_bytes = 0;
|
||||
@ -287,13 +288,6 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
|
||||
{
|
||||
char *host = con->proxy ? con->proxy->host : u->host;
|
||||
int port = con->proxy ? con->proxy->port : u->port;
|
||||
char *logname = user;
|
||||
|
||||
if (con->proxy)
|
||||
{
|
||||
/* If proxy is in use, log in as username@target-site. */
|
||||
logname = concat_strings (user, "@", u->host, (char *) 0);
|
||||
}
|
||||
|
||||
/* Login to the server: */
|
||||
|
||||
@ -301,20 +295,10 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
|
||||
|
||||
csock = connect_to_host (host, port);
|
||||
if (csock == E_HOST)
|
||||
{
|
||||
if (con->proxy)
|
||||
xfree (logname);
|
||||
|
||||
return HOSTERR;
|
||||
}
|
||||
else if (csock < 0)
|
||||
{
|
||||
if (con->proxy)
|
||||
xfree (logname);
|
||||
|
||||
return (retryable_socket_connect_error (errno)
|
||||
? CONERROR : CONIMPOSSIBLE);
|
||||
}
|
||||
|
||||
if (cmd & LEAVE_PENDING)
|
||||
con->csock = csock;
|
||||
@ -326,10 +310,15 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
|
||||
quotearg_style (escape_quoting_style, user));
|
||||
if (opt.server_response)
|
||||
logputs (LOG_ALWAYS, "\n");
|
||||
err = ftp_login (csock, logname, passwd);
|
||||
|
||||
if (con->proxy)
|
||||
xfree (logname);
|
||||
{
|
||||
/* If proxy is in use, log in as username@target-site. */
|
||||
char *logname = concat_strings (user, "@", u->host, (char *) 0);
|
||||
err = ftp_login (csock, logname, passwd);
|
||||
xfree (logname);
|
||||
}
|
||||
else
|
||||
err = ftp_login (csock, user, passwd);
|
||||
|
||||
/* FTPRERR, FTPSRVERR, WRITEFAILED, FTPLOGREFUSED, FTPLOGINC */
|
||||
switch (err)
|
||||
@ -512,7 +501,7 @@ Error in server response, closing control connection.\n"));
|
||||
logputs (LOG_VERBOSE, _("==> CWD not needed.\n"));
|
||||
else
|
||||
{
|
||||
char *targ = NULL;
|
||||
const char *targ = NULL;
|
||||
int cwd_count;
|
||||
int cwd_end;
|
||||
int cwd_start;
|
||||
@ -1152,13 +1141,25 @@ Error in server response, closing control connection.\n"));
|
||||
Elsewhere, define a constant "binary" flag.
|
||||
Isn't it nice to have distinct text and binary file types?
|
||||
*/
|
||||
# define BIN_TYPE_TRANSFER (type_char != 'A')
|
||||
/* 2011-09-30 SMS.
|
||||
Added listing files to the set of non-"binary" (text, Stream_LF)
|
||||
files. (Wget works either way, but other programs, like, say, text
|
||||
editors, work better on listing files which have text attributes.)
|
||||
Now we use "binary" attributes for a binary ("IMAGE") transfer,
|
||||
unless "--ftp-stmlf" was specified, and we always use non-"binary"
|
||||
(text, Stream_LF) attributes for a listing file, or for an ASCII
|
||||
transfer.
|
||||
Tidied the VMS-specific BIN_TYPE_xxx macros, and changed the call to
|
||||
fopen_excl() (restored?) to use BIN_TYPE_FILE instead of "true".
|
||||
*/
|
||||
#ifdef __VMS
|
||||
# define BIN_TYPE_TRANSFER (type_char != 'A')
|
||||
# define BIN_TYPE_FILE \
|
||||
((!(cmd & DO_LIST)) && BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
|
||||
# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
|
||||
# define FOPEN_OPT_ARGS_BIN "ctx=bin,stm", "rfm=fix", "mrs=512" FOPEN_OPT_ARGS
|
||||
# define BIN_TYPE_FILE (BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
|
||||
#else /* def __VMS */
|
||||
# define BIN_TYPE_FILE 1
|
||||
# define BIN_TYPE_FILE true
|
||||
#endif /* def __VMS [else] */
|
||||
|
||||
if (restval && !(con->cmd & DO_LIST))
|
||||
@ -1182,7 +1183,7 @@ Error in server response, closing control connection.\n"));
|
||||
}
|
||||
else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct
|
||||
|| opt.output_document || count > 0)
|
||||
{
|
||||
{
|
||||
if (opt.unlink && file_exists_p (con->target))
|
||||
{
|
||||
int res = unlink (con->target);
|
||||
@ -1217,7 +1218,7 @@ Error in server response, closing control connection.\n"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fp = fopen_excl (con->target, true);
|
||||
fp = fopen_excl (con->target, BIN_TYPE_FILE);
|
||||
if (!fp && errno == EEXIST)
|
||||
{
|
||||
/* We cannot just invent a new name and use it (which is
|
||||
@ -1262,7 +1263,7 @@ Error in server response, closing control connection.\n"));
|
||||
rd_size = 0;
|
||||
res = fd_read_body (dtsock, fp,
|
||||
expected_bytes ? expected_bytes - restval : 0,
|
||||
restval, &rd_size, qtyread, &con->dltime, flags);
|
||||
restval, &rd_size, qtyread, &con->dltime, flags, warc_tmp);
|
||||
|
||||
tms = datetime_str (time (NULL));
|
||||
tmrate = retr_rate (rd_size, con->dltime);
|
||||
@ -1273,15 +1274,18 @@ Error in server response, closing control connection.\n"));
|
||||
if (!output_stream || con->cmd & DO_LIST)
|
||||
fclose (fp);
|
||||
|
||||
/* If fd_read_body couldn't write to fp, bail out. */
|
||||
if (res == -2)
|
||||
/* If fd_read_body couldn't write to fp or warc_tmp, bail out. */
|
||||
if (res == -2 || (warc_tmp != NULL && res == -3))
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
|
||||
con->target, strerror (errno));
|
||||
fd_close (csock);
|
||||
con->csock = -1;
|
||||
fd_close (dtsock);
|
||||
return FWRITEERR;
|
||||
if (res == -2)
|
||||
return FWRITEERR;
|
||||
else if (res == -3)
|
||||
return WARC_TMP_FWRITEERR;
|
||||
}
|
||||
else if (res == -1)
|
||||
{
|
||||
@ -1397,6 +1401,11 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
|
||||
uerr_t err;
|
||||
struct_stat st;
|
||||
|
||||
/* Declare WARC variables. */
|
||||
bool warc_enabled = (opt.warc_filename != NULL);
|
||||
FILE *warc_tmp = NULL;
|
||||
ip_address *warc_ip = NULL;
|
||||
|
||||
/* Get the target, and set the name for the message accordingly. */
|
||||
if ((f == NULL) && (con->target))
|
||||
{
|
||||
@ -1433,6 +1442,21 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
|
||||
|
||||
orig_lp = con->cmd & LEAVE_PENDING ? 1 : 0;
|
||||
|
||||
/* For file RETR requests, we can write a WARC record.
|
||||
We record the file contents to a temporary file. */
|
||||
if (warc_enabled && (con->cmd & DO_RETR))
|
||||
{
|
||||
warc_tmp = warc_tempfile ();
|
||||
if (warc_tmp == NULL)
|
||||
return WARC_TMP_FOPENERR;
|
||||
|
||||
if (!con->proxy && con->csock != -1)
|
||||
{
|
||||
warc_ip = (ip_address *) alloca (sizeof (ip_address));
|
||||
socket_ip_address (con->csock, warc_ip, ENDPOINT_PEER);
|
||||
}
|
||||
}
|
||||
|
||||
/* THE loop. */
|
||||
do
|
||||
{
|
||||
@ -1507,7 +1531,9 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
|
||||
len = range->last_byte - restval + 1;
|
||||
}
|
||||
|
||||
err = getftp (u, len, &qtyread, restval, con, count);
|
||||
/* If we are working on a WARC record, getftp should also write
|
||||
to the warc_tmp file. */
|
||||
err = getftp (u, len, &qtyread, restval, con, count, warc_tmp);
|
||||
|
||||
if (range)
|
||||
range->bytes_covered = qtyread;
|
||||
@ -1521,8 +1547,10 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
|
||||
{
|
||||
case HOSTERR: case CONIMPOSSIBLE: case FWRITEERR: case FOPENERR:
|
||||
case FTPNSFOD: case FTPLOGINC: case FTPNOPASV: case CONTNOTSUPPORTED:
|
||||
case UNLINKERR:
|
||||
case UNLINKERR: case WARC_TMP_FWRITEERR:
|
||||
/* Fatal errors, give up. */
|
||||
if (warc_tmp != NULL)
|
||||
fclose (warc_tmp);
|
||||
return err;
|
||||
case CONSOCKERR: case CONERROR: case FTPSRVERR: case FTPRERR:
|
||||
case WRITEFAILED: case FTPUNKNOWNTYPE: case FTPSYSERR:
|
||||
@ -1590,6 +1618,19 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
|
||||
xfree (hurl);
|
||||
}
|
||||
|
||||
if (warc_enabled && (con->cmd & DO_RETR))
|
||||
{
|
||||
/* Create and store a WARC resource record for the retrieved file. */
|
||||
bool warc_res;
|
||||
|
||||
warc_res = warc_write_resource_record (NULL, u->url, NULL, NULL,
|
||||
warc_ip, NULL, warc_tmp, -1);
|
||||
if (! warc_res)
|
||||
return WARC_ERR;
|
||||
|
||||
/* warc_write_resource_record has also closed warc_tmp. */
|
||||
}
|
||||
|
||||
if ((con->cmd & DO_LIST))
|
||||
/* This is a directory listing file. */
|
||||
{
|
||||
@ -1893,8 +1934,10 @@ Already have correct symlink %s -> %s\n\n"),
|
||||
|
||||
set_local_file (&actual_target, con->target);
|
||||
|
||||
/* If downloading a plain file, set valid (non-zero) permissions. */
|
||||
if (dlthis && (actual_target != NULL) && (f->type == FT_PLAINFILE))
|
||||
/* If downloading a plain file, and the user requested it, then
|
||||
set valid (non-zero) permissions. */
|
||||
if (dlthis && (actual_target != NULL) &&
|
||||
(f->type == FT_PLAINFILE) && opt.preserve_perm)
|
||||
{
|
||||
if (f->perms)
|
||||
chmod (actual_target, f->perms);
|
||||
@ -1927,7 +1970,9 @@ Already have correct symlink %s -> %s\n\n"),
|
||||
xfree (ofile);
|
||||
|
||||
/* Break on fatals. */
|
||||
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
|
||||
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR
|
||||
|| err == WARC_ERR || err == WARC_TMP_FOPENERR
|
||||
|| err == WARC_TMP_FWRITEERR)
|
||||
break;
|
||||
con->cmd &= ~ (DO_CWD | DO_LOGIN);
|
||||
f = f->next;
|
||||
|
178
src/gnutls.c
178
src/gnutls.c
@ -1,5 +1,5 @@
|
||||
/* SSL support via GnuTLS library.
|
||||
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
|
||||
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
|
||||
Foundation, Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -54,15 +54,38 @@ as that of the covered work. */
|
||||
# include "w32sock.h"
|
||||
#endif
|
||||
|
||||
#include "host.h"
|
||||
|
||||
static int
|
||||
key_type_to_gnutls_type (enum keyfile_type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case keyfile_pem:
|
||||
return GNUTLS_X509_FMT_PEM;
|
||||
case keyfile_asn1:
|
||||
return GNUTLS_X509_FMT_DER;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Note: some of the functions private to this file have names that
|
||||
begin with "wgnutls_" (e.g. wgnutls_read) so that they wouldn't be
|
||||
confused with actual gnutls functions -- such as the gnutls_read
|
||||
preprocessor macro. */
|
||||
|
||||
static gnutls_certificate_credentials credentials;
|
||||
static gnutls_certificate_credentials_t credentials;
|
||||
bool
|
||||
ssl_init ()
|
||||
ssl_init (void)
|
||||
{
|
||||
/* Becomes true if GnuTLS is initialized. */
|
||||
static bool ssl_initialized = false;
|
||||
|
||||
/* GnuTLS should be initialized only once. */
|
||||
if (ssl_initialized)
|
||||
return true;
|
||||
|
||||
const char *ca_directory;
|
||||
DIR *dir;
|
||||
|
||||
@ -101,15 +124,48 @@ ssl_init ()
|
||||
closedir (dir);
|
||||
}
|
||||
|
||||
/* Use the private key from the cert file unless otherwise specified. */
|
||||
if (opt.cert_file && !opt.private_key)
|
||||
{
|
||||
opt.private_key = opt.cert_file;
|
||||
opt.private_key_type = opt.cert_type;
|
||||
}
|
||||
/* Use the cert from the private key file unless otherwise specified. */
|
||||
if (!opt.cert_file && opt.private_key)
|
||||
{
|
||||
opt.cert_file = opt.private_key;
|
||||
opt.cert_type = opt.private_key_type;
|
||||
}
|
||||
|
||||
if (opt.cert_file && opt.private_key)
|
||||
{
|
||||
int type;
|
||||
if (opt.private_key_type != opt.cert_type)
|
||||
{
|
||||
/* GnuTLS can't handle this */
|
||||
logprintf (LOG_NOTQUIET, _("ERROR: GnuTLS requires the key and the \
|
||||
cert to be of the same type.\n"));
|
||||
}
|
||||
|
||||
type = key_type_to_gnutls_type (opt.private_key_type);
|
||||
|
||||
gnutls_certificate_set_x509_key_file (credentials, opt.cert_file,
|
||||
opt.private_key,
|
||||
type);
|
||||
}
|
||||
|
||||
if (opt.ca_cert)
|
||||
gnutls_certificate_set_x509_trust_file (credentials, opt.ca_cert,
|
||||
GNUTLS_X509_FMT_PEM);
|
||||
|
||||
ssl_initialized = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct wgnutls_transport_context
|
||||
{
|
||||
gnutls_session session; /* GnuTLS session handle */
|
||||
gnutls_session_t session; /* GnuTLS session handle */
|
||||
int last_error; /* last error returned by read/write/... */
|
||||
|
||||
/* Since GnuTLS doesn't support the equivalent to recv(...,
|
||||
@ -132,7 +188,7 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
|
||||
int flags = 0;
|
||||
#endif
|
||||
int ret = 0;
|
||||
struct ptimer *timer;
|
||||
struct ptimer *timer = NULL;
|
||||
struct wgnutls_transport_context *ctx = arg;
|
||||
int timed_out = 0;
|
||||
|
||||
@ -142,64 +198,56 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
|
||||
flags = fcntl (fd, F_GETFL, 0);
|
||||
if (flags < 0)
|
||||
return flags;
|
||||
if (fcntl (fd, F_SETFL, flags | O_NONBLOCK))
|
||||
return -1;
|
||||
#else
|
||||
/* XXX: Assume it was blocking before. */
|
||||
const int one = 1;
|
||||
if (ioctl (fd, FIONBIO, &one) < 0)
|
||||
return -1;
|
||||
#endif
|
||||
|
||||
timer = ptimer_new ();
|
||||
if (timer == 0)
|
||||
if (timer == NULL)
|
||||
return -1;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
double next_timeout = timeout - ptimer_measure (timer);
|
||||
if (timeout && next_timeout < 0)
|
||||
break;
|
||||
double next_timeout = 0;
|
||||
if (timeout)
|
||||
{
|
||||
next_timeout = timeout - ptimer_measure (timer);
|
||||
if (next_timeout < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = GNUTLS_E_AGAIN;
|
||||
if (timeout == 0 || gnutls_record_check_pending (ctx->session)
|
||||
|| select_fd (fd, next_timeout, WAIT_FOR_READ))
|
||||
{
|
||||
if (timeout)
|
||||
{
|
||||
#ifdef F_GETFL
|
||||
ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
#else
|
||||
/* XXX: Assume it was blocking before. */
|
||||
const int one = 1;
|
||||
ret = ioctl (fd, FIONBIO, &one);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
ret = gnutls_record_recv (ctx->session, buf, bufsize);
|
||||
|
||||
if (timeout)
|
||||
{
|
||||
int status;
|
||||
#ifdef F_GETFL
|
||||
status = fcntl (fd, F_SETFL, flags);
|
||||
if (status < 0)
|
||||
return status;
|
||||
#else
|
||||
const int zero = 0;
|
||||
status = ioctl (fd, FIONBIO, &zero);
|
||||
if (status < 0)
|
||||
return status;
|
||||
#endif
|
||||
}
|
||||
timed_out = timeout && ptimer_measure (timer) >= timeout;
|
||||
}
|
||||
|
||||
timed_out = timeout && ptimer_measure (timer) >= timeout;
|
||||
}
|
||||
while (ret == GNUTLS_E_INTERRUPTED || (ret == GNUTLS_E_AGAIN && !timed_out));
|
||||
|
||||
if (timeout)
|
||||
ptimer_destroy (timer);
|
||||
{
|
||||
ptimer_destroy (timer);
|
||||
|
||||
if (timeout && timed_out && ret == GNUTLS_E_AGAIN)
|
||||
errno = ETIMEDOUT;
|
||||
#ifdef F_GETFL
|
||||
if (fcntl (fd, F_SETFL, flags) < 0)
|
||||
return -1;
|
||||
#else
|
||||
const int zero = 0;
|
||||
if (ioctl (fd, FIONBIO, &zero) < 0)
|
||||
return -1;
|
||||
#endif
|
||||
|
||||
if (timed_out && ret == GNUTLS_E_AGAIN)
|
||||
errno = ETIMEDOUT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -207,11 +255,7 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
|
||||
static int
|
||||
wgnutls_read (int fd, char *buf, int bufsize, void *arg)
|
||||
{
|
||||
#ifdef F_GETFL
|
||||
int flags = 0;
|
||||
#endif
|
||||
int ret = 0;
|
||||
struct ptimer *timer;
|
||||
struct wgnutls_transport_context *ctx = arg;
|
||||
|
||||
if (ctx->peeklen)
|
||||
@ -250,8 +294,12 @@ static int
|
||||
wgnutls_poll (int fd, double timeout, int wait_for, void *arg)
|
||||
{
|
||||
struct wgnutls_transport_context *ctx = arg;
|
||||
return ctx->peeklen || gnutls_record_check_pending (ctx->session)
|
||||
|| select_fd (fd, timeout, wait_for);
|
||||
|
||||
if (timeout)
|
||||
return ctx->peeklen || gnutls_record_check_pending (ctx->session)
|
||||
|| select_fd (fd, timeout, wait_for);
|
||||
else
|
||||
return ctx->peeklen || gnutls_record_check_pending (ctx->session);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -260,15 +308,19 @@ wgnutls_peek (int fd, char *buf, int bufsize, void *arg)
|
||||
int read = 0;
|
||||
struct wgnutls_transport_context *ctx = arg;
|
||||
int offset = MIN (bufsize, ctx->peeklen);
|
||||
|
||||
if (ctx->peeklen)
|
||||
{
|
||||
memcpy (buf, ctx->peekbuf, offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
if (bufsize > sizeof ctx->peekbuf)
|
||||
bufsize = sizeof ctx->peekbuf;
|
||||
|
||||
if (ctx->peeklen)
|
||||
memcpy (buf, ctx->peekbuf, offset);
|
||||
|
||||
if (bufsize > offset)
|
||||
{
|
||||
if (gnutls_record_check_pending (ctx->session) <= 0
|
||||
if (opt.read_timeout && gnutls_record_check_pending (ctx->session) == 0
|
||||
&& select_fd (fd, 0.0, WAIT_FOR_READ) <= 0)
|
||||
read = 0;
|
||||
else
|
||||
@ -320,18 +372,26 @@ static struct transport_implementation wgnutls_transport =
|
||||
};
|
||||
|
||||
bool
|
||||
ssl_connect_wget (int fd)
|
||||
ssl_connect_wget (int fd, const char *hostname)
|
||||
{
|
||||
struct wgnutls_transport_context *ctx;
|
||||
gnutls_session session;
|
||||
gnutls_session_t session;
|
||||
int err;
|
||||
gnutls_init (&session, GNUTLS_CLIENT);
|
||||
|
||||
/* We set the server name but only if it's not an IP address. */
|
||||
if (! is_valid_ip_address (hostname))
|
||||
{
|
||||
gnutls_server_name_set (session, GNUTLS_NAME_DNS, hostname,
|
||||
strlen (hostname));
|
||||
}
|
||||
|
||||
gnutls_set_default_priority (session);
|
||||
gnutls_credentials_set (session, GNUTLS_CRD_CERTIFICATE, credentials);
|
||||
#ifndef FD_TO_SOCKET
|
||||
# define FD_TO_SOCKET(X) (X)
|
||||
#endif
|
||||
gnutls_transport_set_ptr (session, (gnutls_transport_ptr) FD_TO_SOCKET (fd));
|
||||
gnutls_transport_set_ptr (session, (gnutls_transport_ptr_t) FD_TO_SOCKET (fd));
|
||||
|
||||
err = 0;
|
||||
#if HAVE_GNUTLS_PRIORITY_SET_DIRECT
|
||||
@ -438,8 +498,8 @@ ssl_check_certificate (int fd, const char *host)
|
||||
if (gnutls_certificate_type_get (ctx->session) == GNUTLS_CRT_X509)
|
||||
{
|
||||
time_t now = time (NULL);
|
||||
gnutls_x509_crt cert;
|
||||
const gnutls_datum *cert_list;
|
||||
gnutls_x509_crt_t cert;
|
||||
const gnutls_datum_t *cert_list;
|
||||
unsigned int cert_list_size;
|
||||
|
||||
if ((err = gnutls_x509_crt_init (&cert)) < 0)
|
||||
|
@ -423,14 +423,14 @@ grow_hash_table (struct hash_table *ht)
|
||||
table if necessary. */
|
||||
|
||||
void
|
||||
hash_table_put (struct hash_table *ht, const void *key, void *value)
|
||||
hash_table_put (struct hash_table *ht, const void *key, const void *value)
|
||||
{
|
||||
struct cell *c = find_cell (ht, key);
|
||||
if (CELL_OCCUPIED (c))
|
||||
{
|
||||
/* update existing item */
|
||||
c->key = (void *)key; /* const? */
|
||||
c->value = value;
|
||||
c->value = (void *)value;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -445,7 +445,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
|
||||
/* add new item */
|
||||
++ht->count;
|
||||
c->key = (void *)key; /* const? */
|
||||
c->value = value;
|
||||
c->value = (void *)value;
|
||||
}
|
||||
|
||||
/* Remove KEY->value mapping from HT. Return 0 if there was no such
|
||||
|
@ -42,7 +42,7 @@ int hash_table_get_pair (const struct hash_table *, const void *,
|
||||
void *, void *);
|
||||
int hash_table_contains (const struct hash_table *, const void *);
|
||||
|
||||
void hash_table_put (struct hash_table *, const void *, void *);
|
||||
void hash_table_put (struct hash_table *, const void *, const void *);
|
||||
int hash_table_remove (struct hash_table *, const void *);
|
||||
void hash_table_clear (struct hash_table *);
|
||||
|
||||
|
17
src/host.c
17
src/host.c
@ -1,6 +1,6 @@
|
||||
/* Host name resolution and matching.
|
||||
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -914,3 +914,18 @@ host_cleanup (void)
|
||||
host_name_addresses_map = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
is_valid_ip_address (const char *name)
|
||||
{
|
||||
const char *endp;
|
||||
|
||||
endp = name + strlen(name);
|
||||
if (is_valid_ipv4_address (name, endp))
|
||||
return true;
|
||||
#ifdef ENABLE_IPV6
|
||||
if (is_valid_ipv6_address (name, endp))
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Declarations for host.c
|
||||
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -98,6 +98,8 @@ const char *print_address (const ip_address *);
|
||||
bool is_valid_ipv6_address (const char *, const char *);
|
||||
#endif
|
||||
|
||||
bool is_valid_ip_address (const char *name);
|
||||
|
||||
bool accept_domain (struct url *);
|
||||
bool sufmatch (const char **, const char *);
|
||||
|
||||
|
@ -280,7 +280,7 @@ struct tagstack_item {
|
||||
struct tagstack_item *next;
|
||||
};
|
||||
|
||||
struct tagstack_item *
|
||||
static struct tagstack_item *
|
||||
tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
|
||||
{
|
||||
struct tagstack_item *ts = xmalloc(sizeof(struct tagstack_item));
|
||||
@ -301,7 +301,7 @@ tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
|
||||
}
|
||||
|
||||
/* remove ts and everything after it from the stack */
|
||||
void
|
||||
static void
|
||||
tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
|
||||
struct tagstack_item *ts)
|
||||
{
|
||||
@ -343,7 +343,7 @@ tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
|
||||
}
|
||||
}
|
||||
|
||||
struct tagstack_item *
|
||||
static struct tagstack_item *
|
||||
tagstack_find (struct tagstack_item *tail, const char *tagname_begin,
|
||||
const char *tagname_end)
|
||||
{
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Collect URLs from HTML source.
|
||||
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
|
||||
2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
|
||||
@ -675,8 +675,9 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
|
||||
|
||||
check_style_attr (tag, ctx);
|
||||
|
||||
if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) &&
|
||||
tag->contents_begin && tag->contents_end)
|
||||
if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style"))
|
||||
&& tag->contents_begin && tag->contents_end
|
||||
&& tag->contents_begin <= tag->contents_end)
|
||||
{
|
||||
/* parse contents */
|
||||
get_urls_css (ctx, tag->contents_begin - ctx->text,
|
||||
@ -829,7 +830,7 @@ get_urls_file (const char *file)
|
||||
return head;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
cleanup_html_url (void)
|
||||
{
|
||||
/* Destroy the hash tables. The hash table keys and values are not
|
||||
|
604
src/http.c
604
src/http.c
@ -1,6 +1,6 @@
|
||||
/* HTTP support.
|
||||
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -62,6 +62,7 @@ as that of the covered work. */
|
||||
#include "md5.h"
|
||||
#include "convert.h"
|
||||
#include "spider.h"
|
||||
#include "warc.h"
|
||||
|
||||
#ifdef TESTING
|
||||
#include "test.h"
|
||||
@ -234,7 +235,7 @@ release_header (struct request_header *hdr)
|
||||
*/
|
||||
|
||||
static void
|
||||
request_set_header (struct request *req, char *name, char *value,
|
||||
request_set_header (struct request *req, const char *name, const char *value,
|
||||
enum rp release_policy)
|
||||
{
|
||||
struct request_header *hdr;
|
||||
@ -245,7 +246,7 @@ request_set_header (struct request *req, char *name, char *value,
|
||||
/* A NULL value is a no-op; if freeing the name is requested,
|
||||
free it now to avoid leaks. */
|
||||
if (release_policy == rel_name || release_policy == rel_both)
|
||||
xfree (name);
|
||||
xfree ((void *)name);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -256,8 +257,8 @@ request_set_header (struct request *req, char *name, char *value,
|
||||
{
|
||||
/* Replace existing header. */
|
||||
release_header (hdr);
|
||||
hdr->name = name;
|
||||
hdr->value = value;
|
||||
hdr->name = (void *)name;
|
||||
hdr->value = (void *)value;
|
||||
hdr->release_policy = release_policy;
|
||||
return;
|
||||
}
|
||||
@ -271,8 +272,8 @@ request_set_header (struct request *req, char *name, char *value,
|
||||
req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
|
||||
}
|
||||
hdr = &req->headers[req->hcount++];
|
||||
hdr->name = name;
|
||||
hdr->value = value;
|
||||
hdr->name = (void *)name;
|
||||
hdr->value = (void *)value;
|
||||
hdr->release_policy = release_policy;
|
||||
}
|
||||
|
||||
@ -299,7 +300,7 @@ request_set_user_header (struct request *req, const char *header)
|
||||
the header was actually removed, false otherwise. */
|
||||
|
||||
static bool
|
||||
request_remove_header (struct request *req, char *name)
|
||||
request_remove_header (struct request *req, const char *name)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < req->hcount; i++)
|
||||
@ -324,10 +325,12 @@ request_remove_header (struct request *req, char *name)
|
||||
p += A_len; \
|
||||
} while (0)
|
||||
|
||||
/* Construct the request and write it to FD using fd_write. */
|
||||
/* Construct the request and write it to FD using fd_write.
|
||||
If warc_tmp is set to a file pointer, the request string will
|
||||
also be written to that file. */
|
||||
|
||||
static int
|
||||
request_send (const struct request *req, int fd)
|
||||
request_send (const struct request *req, int fd, FILE *warc_tmp)
|
||||
{
|
||||
char *request_string, *p;
|
||||
int i, size, write_error;
|
||||
@ -378,6 +381,13 @@ request_send (const struct request *req, int fd)
|
||||
if (write_error < 0)
|
||||
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
|
||||
fd_errstr (fd));
|
||||
else if (warc_tmp != NULL)
|
||||
{
|
||||
/* Write a copy of the data to the WARC record. */
|
||||
int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp);
|
||||
if (warc_tmp_written != size - 1)
|
||||
return -2;
|
||||
}
|
||||
return write_error;
|
||||
}
|
||||
|
||||
@ -448,10 +458,12 @@ register_basic_auth_host (const char *hostname)
|
||||
|
||||
/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
|
||||
PROMISED_SIZE bytes are sent over the wire -- if the file is
|
||||
longer, read only that much; if the file is shorter, report an error. */
|
||||
longer, read only that much; if the file is shorter, report an error.
|
||||
If warc_tmp is set to a file pointer, the post data will
|
||||
also be written to that file. */
|
||||
|
||||
static int
|
||||
post_file (int sock, const char *file_name, wgint promised_size)
|
||||
post_file (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp)
|
||||
{
|
||||
static char chunk[8192];
|
||||
wgint written = 0;
|
||||
@ -476,6 +488,16 @@ post_file (int sock, const char *file_name, wgint promised_size)
|
||||
fclose (fp);
|
||||
return -1;
|
||||
}
|
||||
if (warc_tmp != NULL)
|
||||
{
|
||||
/* Write a copy of the data to the WARC record. */
|
||||
int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp);
|
||||
if (warc_tmp_written != towrite)
|
||||
{
|
||||
fclose (fp);
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
written += towrite;
|
||||
}
|
||||
fclose (fp);
|
||||
@ -933,9 +955,12 @@ skip_short_body (int fd, wgint contlen, bool chunked)
|
||||
break;
|
||||
|
||||
remaining_chunk_size = strtol (line, &endl, 16);
|
||||
xfree (line);
|
||||
|
||||
if (remaining_chunk_size == 0)
|
||||
{
|
||||
fd_read_line (fd);
|
||||
line = fd_read_line (fd);
|
||||
xfree_null (line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -960,8 +985,13 @@ skip_short_body (int fd, wgint contlen, bool chunked)
|
||||
{
|
||||
remaining_chunk_size -= ret;
|
||||
if (remaining_chunk_size == 0)
|
||||
if (fd_read_line (fd) == NULL)
|
||||
return false;
|
||||
{
|
||||
char *line = fd_read_line (fd);
|
||||
if (line == NULL)
|
||||
return false;
|
||||
else
|
||||
xfree (line);
|
||||
}
|
||||
}
|
||||
|
||||
/* Safe even if %.*s bogusly expects terminating \0 because
|
||||
@ -1651,6 +1681,135 @@ File %s already there; not retrieving.\n\n"), quote (filename));
|
||||
*dt |= TEXTHTML;
|
||||
}
|
||||
|
||||
/* Download the response body from the socket and writes it to
|
||||
an output file. The headers have already been read from the
|
||||
socket. If WARC is enabled, the response body will also be
|
||||
written to a WARC response record.
|
||||
|
||||
hs, contlen, contrange, chunked_transfer_encoding and url are
|
||||
parameters from the gethttp method. fp is a pointer to the
|
||||
output file.
|
||||
|
||||
url, warc_timestamp_str, warc_request_uuid, warc_ip, type
|
||||
and statcode will be saved in the headers of the WARC record.
|
||||
The head parameter contains the HTTP headers of the response.
|
||||
|
||||
If fp is NULL and WARC is enabled, the response body will be
|
||||
written only to the WARC file. If WARC is disabled and fp
|
||||
is a file pointer, the data will be written to the file.
|
||||
If fp is a file pointer and WARC is enabled, the body will
|
||||
be written to both destinations.
|
||||
|
||||
Returns the error code. */
|
||||
static int
|
||||
read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
|
||||
wgint contrange, bool chunked_transfer_encoding,
|
||||
char *url, char *warc_timestamp_str, char *warc_request_uuid,
|
||||
ip_address *warc_ip, char *type, int statcode, char *head)
|
||||
{
|
||||
int warc_payload_offset = 0;
|
||||
FILE *warc_tmp = NULL;
|
||||
int warcerr = 0;
|
||||
|
||||
if (opt.warc_filename != NULL)
|
||||
{
|
||||
/* Open a temporary file where we can write the response before we
|
||||
add it to the WARC record. */
|
||||
warc_tmp = warc_tempfile ();
|
||||
if (warc_tmp == NULL)
|
||||
warcerr = WARC_TMP_FOPENERR;
|
||||
|
||||
if (warcerr == 0)
|
||||
{
|
||||
/* We should keep the response headers for the WARC record. */
|
||||
int head_len = strlen (head);
|
||||
int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp);
|
||||
if (warc_tmp_written != head_len)
|
||||
warcerr = WARC_TMP_FWRITEERR;
|
||||
warc_payload_offset = head_len;
|
||||
}
|
||||
|
||||
if (warcerr != 0)
|
||||
{
|
||||
if (warc_tmp != NULL)
|
||||
fclose (warc_tmp);
|
||||
return warcerr;
|
||||
}
|
||||
}
|
||||
|
||||
if (fp != NULL)
|
||||
{
|
||||
/* This confuses the timestamping code that checks for file size.
|
||||
#### The timestamping code should be smarter about file size. */
|
||||
if (opt.save_headers && hs->restval == 0)
|
||||
fwrite (head, 1, strlen (head), fp);
|
||||
}
|
||||
|
||||
/* Read the response body. */
|
||||
int flags = 0;
|
||||
if (contlen != -1)
|
||||
/* If content-length is present, read that much; otherwise, read
|
||||
until EOF. The HTTP spec doesn't require the server to
|
||||
actually close the connection when it's done sending data. */
|
||||
flags |= rb_read_exactly;
|
||||
if (fp != NULL && hs->restval > 0 && contrange == 0)
|
||||
/* If the server ignored our range request, instruct fd_read_body
|
||||
to skip the first RESTVAL bytes of body. */
|
||||
flags |= rb_skip_startpos;
|
||||
if (chunked_transfer_encoding)
|
||||
flags |= rb_chunked_transfer_encoding;
|
||||
|
||||
hs->len = hs->restval;
|
||||
hs->rd_size = 0;
|
||||
/* Download the response body and write it to fp.
|
||||
If we are working on a WARC file, we simultaneously write the
|
||||
response body to warc_tmp. */
|
||||
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
|
||||
hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
|
||||
flags, warc_tmp);
|
||||
if (hs->res >= 0)
|
||||
{
|
||||
if (warc_tmp != NULL)
|
||||
{
|
||||
/* Create a response record and write it to the WARC file.
|
||||
Note: per the WARC standard, the request and response should share
|
||||
the same date header. We re-use the timestamp of the request.
|
||||
The response record should also refer to the uuid of the request. */
|
||||
bool r = warc_write_response_record (url, warc_timestamp_str,
|
||||
warc_request_uuid, warc_ip,
|
||||
warc_tmp, warc_payload_offset,
|
||||
type, statcode, hs->newloc);
|
||||
|
||||
/* warc_write_response_record has closed warc_tmp. */
|
||||
|
||||
if (! r)
|
||||
return WARC_ERR;
|
||||
}
|
||||
|
||||
return RETRFINISHED;
|
||||
}
|
||||
|
||||
if (warc_tmp != NULL)
|
||||
fclose (warc_tmp);
|
||||
|
||||
if (hs->res == -2)
|
||||
{
|
||||
/* Error while writing to fd. */
|
||||
return FWRITEERR;
|
||||
}
|
||||
else if (hs->res == -3)
|
||||
{
|
||||
/* Error while writing to warc_tmp. */
|
||||
return WARC_TMP_FWRITEERR;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A read error! */
|
||||
hs->rderrmsg = xstrdup (fd_errstr (sock));
|
||||
return RETRFINISHED;
|
||||
}
|
||||
}
|
||||
|
||||
#define BEGINS_WITH(line, string_constant) \
|
||||
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
|
||||
&& (c_isspace (line[sizeof (string_constant) - 1]) \
|
||||
@ -1708,9 +1867,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
wgint contlen, contrange;
|
||||
struct url *conn;
|
||||
FILE *fp;
|
||||
int err;
|
||||
|
||||
int sock = -1;
|
||||
int flags;
|
||||
|
||||
#ifdef ENABLE_THREADS
|
||||
struct s_pconn *pconn = NULL;
|
||||
@ -1740,6 +1899,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
char hdrval[256];
|
||||
char *message;
|
||||
|
||||
/* Declare WARC variables. */
|
||||
bool warc_enabled = (opt.warc_filename != NULL);
|
||||
FILE *warc_tmp = NULL;
|
||||
char warc_timestamp_str [21];
|
||||
char warc_request_uuid [48];
|
||||
ip_address *warc_ip = NULL;
|
||||
off_t warc_payload_offset = -1;
|
||||
|
||||
/* Whether this connection will be kept alive after the HTTP request
|
||||
is done. */
|
||||
bool keep_alive;
|
||||
@ -1995,12 +2162,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
0,
|
||||
#endif
|
||||
&host_lookup_failed))
|
||||
{
|
||||
{
|
||||
int family = socket_family (pconn.socket, ENDPOINT_PEER);
|
||||
sock = pconn.socket;
|
||||
using_ssl = pconn.ssl;
|
||||
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
|
||||
quotearg_style (escape_quoting_style, pconn.host),
|
||||
pconn.port);
|
||||
if (family == AF_INET6)
|
||||
logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"),
|
||||
quotearg_style (escape_quoting_style, pconn.host),
|
||||
pconn.port);
|
||||
else
|
||||
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
|
||||
quotearg_style (escape_quoting_style, pconn.host),
|
||||
pconn.port);
|
||||
DEBUGP (("Reusing fd %d.\n", sock));
|
||||
if (pconn.authorized)
|
||||
#else
|
||||
@ -2015,11 +2188,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
|
||||
if (pconn)
|
||||
{
|
||||
int family = socket_family (pconn->socket, ENDPOINT_PEER);
|
||||
sock = pconn->socket;
|
||||
using_ssl = pconn->ssl;
|
||||
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
|
||||
quotearg_style (escape_quoting_style, pconn->host),
|
||||
pconn->port);
|
||||
if (family == AF_INET6)
|
||||
logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"),
|
||||
quotearg_style (escape_quoting_style, pconn.host),
|
||||
pconn.port);
|
||||
else
|
||||
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
|
||||
quotearg_style (escape_quoting_style, pconn.host),
|
||||
pconn.port);
|
||||
DEBUGP (("Reusing fd %d.\n", sock));
|
||||
if (pconn->authorized)
|
||||
#endif
|
||||
@ -2076,11 +2255,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
that the contents of Host would be exactly the same as
|
||||
the contents of CONNECT. */
|
||||
|
||||
write_error = request_send (connreq, sock);
|
||||
write_error = request_send (connreq, sock, 0);
|
||||
request_free (connreq);
|
||||
if (write_error < 0)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
request_free (req);
|
||||
return WRITEFAILED;
|
||||
}
|
||||
|
||||
@ -2090,6 +2270,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
|
||||
fd_errstr (sock));
|
||||
CLOSE_INVALIDATE (sock);
|
||||
request_free (req);
|
||||
return HERR;
|
||||
}
|
||||
message = NULL;
|
||||
@ -2110,6 +2291,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
quotearg_style (escape_quoting_style,
|
||||
_("Malformed status line")));
|
||||
xfree (head);
|
||||
request_free (req);
|
||||
return HERR;
|
||||
}
|
||||
hs->message = xstrdup (message);
|
||||
@ -2121,6 +2303,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
|
||||
message ? quotearg_style (escape_quoting_style, message) : "?");
|
||||
xfree_null (message);
|
||||
request_free (req);
|
||||
return CONSSLERR;
|
||||
}
|
||||
xfree_null (message);
|
||||
@ -2133,14 +2316,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
|
||||
if (conn->scheme == SCHEME_HTTPS)
|
||||
{
|
||||
if (!ssl_connect_wget (sock))
|
||||
if (!ssl_connect_wget (sock, u->host))
|
||||
{
|
||||
fd_close (sock);
|
||||
request_free (req);
|
||||
return CONSSLERR;
|
||||
}
|
||||
else if (!ssl_check_certificate (sock, u->host))
|
||||
{
|
||||
fd_close (sock);
|
||||
request_free (req);
|
||||
return VERIFCERTERR;
|
||||
}
|
||||
using_ssl = true;
|
||||
@ -2148,8 +2333,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
#endif /* HAVE_SSL */
|
||||
}
|
||||
|
||||
/* Open the temporary file where we will write the request. */
|
||||
if (warc_enabled)
|
||||
{
|
||||
warc_tmp = warc_tempfile ();
|
||||
if (warc_tmp == NULL)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
request_free (req);
|
||||
return WARC_TMP_FOPENERR;
|
||||
}
|
||||
|
||||
if (! proxy)
|
||||
{
|
||||
warc_ip = (ip_address *) alloca (sizeof (ip_address));
|
||||
socket_ip_address (sock, warc_ip, ENDPOINT_PEER);
|
||||
}
|
||||
}
|
||||
|
||||
/* Send the request to server. */
|
||||
write_error = request_send (req, sock);
|
||||
write_error = request_send (req, sock, warc_tmp);
|
||||
|
||||
if (write_error >= 0)
|
||||
{
|
||||
@ -2157,16 +2360,39 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
{
|
||||
DEBUGP (("[POST data: %s]\n", opt.post_data));
|
||||
write_error = fd_write (sock, opt.post_data, post_data_size, -1);
|
||||
if (write_error >= 0 && warc_tmp != NULL)
|
||||
{
|
||||
/* Remember end of headers / start of payload. */
|
||||
warc_payload_offset = ftello (warc_tmp);
|
||||
|
||||
/* Write a copy of the data to the WARC record. */
|
||||
int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp);
|
||||
if (warc_tmp_written != post_data_size)
|
||||
write_error = -2;
|
||||
}
|
||||
}
|
||||
else if (opt.post_file_name && post_data_size != 0)
|
||||
write_error = post_file (sock, opt.post_file_name, post_data_size);
|
||||
{
|
||||
if (warc_tmp != NULL)
|
||||
/* Remember end of headers / start of payload. */
|
||||
warc_payload_offset = ftello (warc_tmp);
|
||||
|
||||
write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (write_error < 0)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
request_free (req);
|
||||
return WRITEFAILED;
|
||||
|
||||
if (warc_tmp != NULL)
|
||||
fclose (warc_tmp);
|
||||
|
||||
if (write_error == -2)
|
||||
return WARC_TMP_FWRITEERR;
|
||||
else
|
||||
return WRITEFAILED;
|
||||
}
|
||||
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
|
||||
proxy ? "Proxy" : "HTTP");
|
||||
@ -2174,6 +2400,29 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
|
||||
contrange = 0;
|
||||
*dt &= ~RETROKF;
|
||||
|
||||
|
||||
if (warc_enabled)
|
||||
{
|
||||
bool warc_result;
|
||||
/* Generate a timestamp and uuid for this request. */
|
||||
warc_timestamp (warc_timestamp_str);
|
||||
warc_uuid_str (warc_request_uuid);
|
||||
|
||||
/* Create a request record and store it in the WARC file. */
|
||||
warc_result = warc_write_request_record (u->url, warc_timestamp_str,
|
||||
warc_request_uuid, warc_ip,
|
||||
warc_tmp, warc_payload_offset);
|
||||
if (! warc_result)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
request_free (req);
|
||||
return WARC_ERR;
|
||||
}
|
||||
|
||||
/* warc_write_request_record has also closed warc_tmp. */
|
||||
}
|
||||
|
||||
|
||||
read_header:
|
||||
head = read_http_response_head (sock);
|
||||
if (!head)
|
||||
@ -2209,6 +2458,7 @@ read_header:
|
||||
quotearg_style (escape_quoting_style,
|
||||
_("Malformed status line")));
|
||||
CLOSE_INVALIDATE (sock);
|
||||
resp_free (resp);
|
||||
request_free (req);
|
||||
xfree (head);
|
||||
return HERR;
|
||||
@ -2217,6 +2467,7 @@ read_header:
|
||||
if (H_10X (statcode))
|
||||
{
|
||||
DEBUGP (("Ignoring response\n"));
|
||||
resp_free (resp);
|
||||
xfree (head);
|
||||
goto read_header;
|
||||
}
|
||||
@ -2297,11 +2548,42 @@ read_header:
|
||||
if (statcode == HTTP_STATUS_UNAUTHORIZED)
|
||||
{
|
||||
/* Authorization is required. */
|
||||
if (keep_alive && !head_only
|
||||
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
|
||||
CLOSE_FINISH (sock);
|
||||
|
||||
/* Normally we are not interested in the response body.
|
||||
But if we are writing a WARC file we are: we like to keep everyting. */
|
||||
if (warc_enabled)
|
||||
{
|
||||
int err;
|
||||
type = resp_header_strdup (resp, "Content-Type");
|
||||
err = read_response_body (hs, sock, NULL, contlen, 0,
|
||||
chunked_transfer_encoding,
|
||||
u->url, warc_timestamp_str,
|
||||
warc_request_uuid, warc_ip, type,
|
||||
statcode, head);
|
||||
xfree_null (type);
|
||||
|
||||
if (err != RETRFINISHED || hs->res < 0)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
request_free (req);
|
||||
xfree_null (message);
|
||||
resp_free (resp);
|
||||
xfree (head);
|
||||
return err;
|
||||
}
|
||||
else
|
||||
CLOSE_FINISH (sock);
|
||||
}
|
||||
else
|
||||
CLOSE_INVALIDATE (sock);
|
||||
{
|
||||
/* Since WARC is disabled, we are not interested in the response body. */
|
||||
if (keep_alive && !head_only
|
||||
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
|
||||
CLOSE_FINISH (sock);
|
||||
else
|
||||
CLOSE_INVALIDATE (sock);
|
||||
}
|
||||
|
||||
#ifndef ENABLE_THREADS
|
||||
pconn.authorized = false;
|
||||
#else
|
||||
@ -2416,6 +2698,8 @@ read_header:
|
||||
retrieve the file. But if the output_document was given, then this
|
||||
test was already done and the file didn't exist. Hence the !opt.output_document */
|
||||
get_file_flags (hs->local_file, dt);
|
||||
request_free (req);
|
||||
resp_free (resp);
|
||||
xfree (head);
|
||||
xfree_null (message);
|
||||
return RETRUNNEEDED;
|
||||
@ -2559,11 +2843,42 @@ read_header:
|
||||
_("Location: %s%s\n"),
|
||||
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
|
||||
hs->newloc ? _(" [following]") : "");
|
||||
if (keep_alive && !head_only
|
||||
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
|
||||
CLOSE_FINISH (sock);
|
||||
|
||||
/* In case the caller cares to look... */
|
||||
hs->len = 0;
|
||||
hs->res = 0;
|
||||
hs->restval = 0;
|
||||
|
||||
/* Normally we are not interested in the response body of a redirect.
|
||||
But if we are writing a WARC file we are: we like to keep everyting. */
|
||||
if (warc_enabled)
|
||||
{
|
||||
int err = read_response_body (hs, sock, NULL, contlen, 0,
|
||||
chunked_transfer_encoding,
|
||||
u->url, warc_timestamp_str,
|
||||
warc_request_uuid, warc_ip, type,
|
||||
statcode, head);
|
||||
|
||||
if (err != RETRFINISHED || hs->res < 0)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
xfree_null (type);
|
||||
xfree (head);
|
||||
return err;
|
||||
}
|
||||
else
|
||||
CLOSE_FINISH (sock);
|
||||
}
|
||||
else
|
||||
CLOSE_INVALIDATE (sock);
|
||||
{
|
||||
/* Since WARC is disabled, we are not interested in the response body. */
|
||||
if (keep_alive && !head_only
|
||||
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
|
||||
CLOSE_FINISH (sock);
|
||||
else
|
||||
CLOSE_INVALIDATE (sock);
|
||||
}
|
||||
|
||||
xfree_null (type);
|
||||
xfree (head);
|
||||
/* From RFC2616: The status codes 303 and 307 have
|
||||
@ -2680,8 +2995,6 @@ read_header:
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
}
|
||||
}
|
||||
xfree_null (type);
|
||||
type = NULL; /* We don't need it any more. */
|
||||
|
||||
/* Return if we have no intention of further downloading. */
|
||||
if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
|
||||
@ -2689,21 +3002,48 @@ read_header:
|
||||
/* In case the caller cares to look... */
|
||||
hs->len = 0;
|
||||
hs->res = 0;
|
||||
xfree_null (type);
|
||||
if (head_only)
|
||||
/* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
|
||||
servers not to send body in response to a HEAD request, and
|
||||
those that do will likely be caught by test_socket_open.
|
||||
If not, they can be worked around using
|
||||
`--no-http-keep-alive'. */
|
||||
CLOSE_FINISH (sock);
|
||||
else if (keep_alive
|
||||
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
|
||||
/* Successfully skipped the body; also keep using the socket. */
|
||||
CLOSE_FINISH (sock);
|
||||
hs->restval = 0;
|
||||
|
||||
/* Normally we are not interested in the response body of a error responses.
|
||||
But if we are writing a WARC file we are: we like to keep everyting. */
|
||||
if (warc_enabled)
|
||||
{
|
||||
int err = read_response_body (hs, sock, NULL, contlen, 0,
|
||||
chunked_transfer_encoding,
|
||||
u->url, warc_timestamp_str,
|
||||
warc_request_uuid, warc_ip, type,
|
||||
statcode, head);
|
||||
|
||||
if (err != RETRFINISHED || hs->res < 0)
|
||||
{
|
||||
CLOSE_INVALIDATE (sock);
|
||||
xfree (head);
|
||||
xfree_null (type);
|
||||
return err;
|
||||
}
|
||||
else
|
||||
CLOSE_FINISH (sock);
|
||||
}
|
||||
else
|
||||
CLOSE_INVALIDATE (sock);
|
||||
{
|
||||
/* Since WARC is disabled, we are not interested in the response body. */
|
||||
if (head_only)
|
||||
/* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
|
||||
servers not to send body in response to a HEAD request, and
|
||||
those that do will likely be caught by test_socket_open.
|
||||
If not, they can be worked around using
|
||||
`--no-http-keep-alive'. */
|
||||
CLOSE_FINISH (sock);
|
||||
else if (keep_alive
|
||||
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
|
||||
/* Successfully skipped the body; also keep using the socket. */
|
||||
CLOSE_FINISH (sock);
|
||||
else
|
||||
CLOSE_INVALIDATE (sock);
|
||||
}
|
||||
|
||||
xfree (head);
|
||||
xfree_null (type);
|
||||
return RETRFINISHED;
|
||||
}
|
||||
|
||||
@ -2745,6 +3085,7 @@ read_header:
|
||||
strerror (errno));
|
||||
CLOSE_INVALIDATE (sock);
|
||||
xfree (head);
|
||||
xfree_null (type);
|
||||
return UNLINKERR;
|
||||
}
|
||||
}
|
||||
@ -2772,6 +3113,7 @@ read_header:
|
||||
hs->local_file);
|
||||
CLOSE_INVALIDATE (sock);
|
||||
xfree (head);
|
||||
xfree_null (type);
|
||||
return FOPEN_EXCL_ERR;
|
||||
}
|
||||
}
|
||||
@ -2780,6 +3122,7 @@ read_header:
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
|
||||
CLOSE_INVALIDATE (sock);
|
||||
xfree (head);
|
||||
xfree_null (type);
|
||||
return FOPENERR;
|
||||
}
|
||||
}
|
||||
@ -2793,49 +3136,26 @@ read_header:
|
||||
HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
|
||||
}
|
||||
|
||||
/* This confuses the timestamping code that checks for file size.
|
||||
#### The timestamping code should be smarter about file size. */
|
||||
if (opt.save_headers && hs->restval == 0)
|
||||
fwrite (head, 1, strlen (head), fp);
|
||||
|
||||
err = read_response_body (hs, sock, fp, contlen, contrange,
|
||||
chunked_transfer_encoding,
|
||||
u->url, warc_timestamp_str,
|
||||
warc_request_uuid, warc_ip, type,
|
||||
statcode, head);
|
||||
|
||||
/* Now we no longer need to store the response header. */
|
||||
xfree (head);
|
||||
|
||||
/* Download the request body. */
|
||||
flags = 0;
|
||||
if (contlen != -1)
|
||||
/* If content-length is present, read that much; otherwise, read
|
||||
until EOF. The HTTP spec doesn't require the server to
|
||||
actually close the connection when it's done sending data. */
|
||||
flags |= rb_read_exactly;
|
||||
if (hs->restval > 0 && contrange == 0)
|
||||
/* If the server ignored our range request, instruct fd_read_body
|
||||
to skip the first RESTVAL bytes of body. */
|
||||
flags |= rb_skip_startpos;
|
||||
|
||||
if (chunked_transfer_encoding)
|
||||
flags |= rb_chunked_transfer_encoding;
|
||||
|
||||
hs->len = hs->restval;
|
||||
hs->rd_size = 0;
|
||||
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
|
||||
hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
|
||||
flags);
|
||||
xfree_null (type);
|
||||
|
||||
if (hs->res >= 0)
|
||||
CLOSE_FINISH (sock);
|
||||
else
|
||||
{
|
||||
if (hs->res < 0)
|
||||
hs->rderrmsg = xstrdup (fd_errstr (sock));
|
||||
CLOSE_INVALIDATE (sock);
|
||||
}
|
||||
CLOSE_INVALIDATE (sock);
|
||||
|
||||
if (!output_stream)
|
||||
fclose (fp);
|
||||
if (hs->res == -2)
|
||||
return FWRITEERR;
|
||||
return RETRFINISHED;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* The genuine HTTP loop! This is the part where the retrieval is
|
||||
@ -2859,6 +3179,12 @@ http_loop (struct url *u, struct url *original_url, char **newloc,
|
||||
char *file_name;
|
||||
bool force_full_retrieve = false;
|
||||
|
||||
|
||||
/* If we are writing to a WARC file: always retrieve the whole file. */
|
||||
if (opt.warc_filename != NULL)
|
||||
force_full_retrieve = true;
|
||||
|
||||
|
||||
#ifndef ENABLE_METALINK
|
||||
/* Assert that no value for *LOCAL_FILE was passed. */
|
||||
assert (local_file == NULL || *local_file == NULL);
|
||||
@ -3047,6 +3373,18 @@ Spider mode enabled. Check if remote file exists.\n"));
|
||||
/* Fatal errors just return from the function. */
|
||||
ret = err;
|
||||
goto exit;
|
||||
case WARC_ERR:
|
||||
/* A fatal WARC error. */
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET, _("Cannot write to WARC file.\n"));
|
||||
ret = err;
|
||||
goto exit;
|
||||
case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR:
|
||||
/* A fatal WARC error. */
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n"));
|
||||
ret = err;
|
||||
goto exit;
|
||||
case CONSSLERR:
|
||||
/* Another fatal error. */
|
||||
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
||||
@ -3575,19 +3913,23 @@ digest_authentication_encode (const char *au, const char *user,
|
||||
const char *passwd, const char *method,
|
||||
const char *path)
|
||||
{
|
||||
static char *realm, *opaque, *nonce;
|
||||
static char *realm, *opaque, *nonce, *qop;
|
||||
static struct {
|
||||
const char *name;
|
||||
char **variable;
|
||||
} options[] = {
|
||||
{ "realm", &realm },
|
||||
{ "opaque", &opaque },
|
||||
{ "nonce", &nonce }
|
||||
{ "nonce", &nonce },
|
||||
{ "qop", &qop }
|
||||
};
|
||||
char cnonce[16] = "";
|
||||
char *res;
|
||||
size_t res_size;
|
||||
param_token name, value;
|
||||
|
||||
realm = opaque = nonce = NULL;
|
||||
|
||||
realm = opaque = nonce = qop = NULL;
|
||||
|
||||
au += 6; /* skip over `Digest' */
|
||||
while (extract_param (&au, &name, &value, ','))
|
||||
@ -3603,11 +3945,19 @@ digest_authentication_encode (const char *au, const char *user,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (qop != NULL && strcmp(qop,"auth"))
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Unsupported quality of protection '%s'.\n"), qop);
|
||||
user = NULL; /* force freeing mem and return */
|
||||
}
|
||||
|
||||
if (!realm || !nonce || !user || !passwd || !path || !method)
|
||||
{
|
||||
xfree_null (realm);
|
||||
xfree_null (opaque);
|
||||
xfree_null (nonce);
|
||||
xfree_null (qop);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -3636,27 +3986,69 @@ digest_authentication_encode (const char *au, const char *user,
|
||||
md5_finish_ctx (&ctx, hash);
|
||||
dump_hash (a2buf, hash);
|
||||
|
||||
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
|
||||
md5_init_ctx (&ctx);
|
||||
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_finish_ctx (&ctx, hash);
|
||||
if (!strcmp(qop,"auth"))
|
||||
{
|
||||
/* RFC 2617 Digest Access Authentication */
|
||||
/* generate random hex string */
|
||||
snprintf(cnonce, sizeof(cnonce), "%08x", random_number(INT_MAX));
|
||||
|
||||
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" noncecount ":" clientnonce ":" qop ": " A2BUF) */
|
||||
md5_init_ctx (&ctx);
|
||||
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)"00000001", 8, &ctx); /* TODO: keep track of server nonce values */
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)cnonce, strlen(cnonce), &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)qop, strlen(qop), &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_finish_ctx (&ctx, hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* RFC 2069 Digest Access Authentication */
|
||||
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
|
||||
md5_init_ctx (&ctx);
|
||||
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
|
||||
md5_process_bytes ((unsigned char *)":", 1, &ctx);
|
||||
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
|
||||
md5_finish_ctx (&ctx, hash);
|
||||
}
|
||||
|
||||
dump_hash (response_digest, hash);
|
||||
|
||||
res = xmalloc (strlen (user)
|
||||
+ strlen (user)
|
||||
+ strlen (realm)
|
||||
+ strlen (nonce)
|
||||
+ strlen (path)
|
||||
+ 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
|
||||
+ (opaque ? strlen (opaque) : 0)
|
||||
+ 128);
|
||||
sprintf (res, "Digest \
|
||||
username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
|
||||
user, realm, nonce, path, response_digest);
|
||||
res_size = strlen (user)
|
||||
+ strlen (user)
|
||||
+ strlen (realm)
|
||||
+ strlen (nonce)
|
||||
+ strlen (path)
|
||||
+ 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
|
||||
+ (opaque ? strlen (opaque) : 0)
|
||||
+ (qop ? 128: 0)
|
||||
+ 128;
|
||||
|
||||
res = xmalloc (res_size);
|
||||
|
||||
if (!strcmp(qop,"auth"))
|
||||
{
|
||||
snprintf (res, res_size, "Digest "\
|
||||
"username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\""\
|
||||
", qop=auth, nc=00000001, cnonce=\"%s\"",
|
||||
user, realm, nonce, path, response_digest, cnonce);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf (res, res_size, "Digest "\
|
||||
"username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
|
||||
user, realm, nonce, path, response_digest);
|
||||
}
|
||||
|
||||
if (opaque)
|
||||
{
|
||||
char *p = res + strlen (res);
|
||||
|
128
src/init.c
128
src/init.c
@ -1,6 +1,6 @@
|
||||
/* Reading/parsing the initialization file.
|
||||
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -30,6 +30,7 @@ shall include the source code for the parts of OpenSSL used as well
|
||||
as that of the covered work. */
|
||||
|
||||
#include "wget.h"
|
||||
#include "exits.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -46,6 +47,10 @@ as that of the covered work. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <regex.h>
|
||||
#ifdef HAVE_LIBPCRE
|
||||
# include <pcre.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PWD_H
|
||||
# include <pwd.h>
|
||||
@ -62,6 +67,7 @@ as that of the covered work. */
|
||||
#include "res.h" /* for res_cleanup */
|
||||
#include "http.h" /* for http_cleanup */
|
||||
#include "retr.h" /* for output_stream */
|
||||
#include "warc.h" /* for warc_close */
|
||||
|
||||
#ifdef TESTING
|
||||
#include "test.h"
|
||||
@ -88,12 +94,15 @@ CMD_DECLARE (cmd_vector);
|
||||
|
||||
CMD_DECLARE (cmd_spec_dirstruct);
|
||||
CMD_DECLARE (cmd_spec_header);
|
||||
CMD_DECLARE (cmd_spec_warc_header);
|
||||
CMD_DECLARE (cmd_spec_htmlify);
|
||||
CMD_DECLARE (cmd_spec_mirror);
|
||||
CMD_DECLARE (cmd_spec_prefer_family);
|
||||
CMD_DECLARE (cmd_spec_progress);
|
||||
CMD_DECLARE (cmd_spec_recursive);
|
||||
CMD_DECLARE (cmd_spec_regex_type);
|
||||
CMD_DECLARE (cmd_spec_restrict_file_names);
|
||||
CMD_DECLARE (cmd_spec_report_speed);
|
||||
#ifdef HAVE_SSL
|
||||
CMD_DECLARE (cmd_spec_secure_protocol);
|
||||
#endif
|
||||
@ -115,6 +124,7 @@ static const struct {
|
||||
} commands[] = {
|
||||
/* KEEP THIS LIST ALPHABETICALLY SORTED */
|
||||
{ "accept", &opt.accepts, cmd_vector },
|
||||
{ "acceptregex", &opt.acceptregex_s, cmd_string },
|
||||
{ "addhostdir", &opt.add_hostdir, cmd_boolean },
|
||||
{ "adjustextension", &opt.adjust_extension, cmd_boolean },
|
||||
{ "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
|
||||
@ -220,7 +230,7 @@ static const struct {
|
||||
{ "postdata", &opt.post_data, cmd_string },
|
||||
{ "postfile", &opt.post_file_name, cmd_file },
|
||||
{ "preferfamily", NULL, cmd_spec_prefer_family },
|
||||
{ "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
|
||||
{ "preservepermissions", &opt.preserve_perm, cmd_boolean },
|
||||
#ifdef HAVE_SSL
|
||||
{ "privatekey", &opt.private_key, cmd_file },
|
||||
{ "privatekeytype", &opt.private_key_type, cmd_cert_type },
|
||||
@ -240,10 +250,13 @@ static const struct {
|
||||
{ "reclevel", &opt.reclevel, cmd_number_inf },
|
||||
{ "recursive", NULL, cmd_spec_recursive },
|
||||
{ "referer", &opt.referer, cmd_string },
|
||||
{ "regextype", &opt.regex_type, cmd_spec_regex_type },
|
||||
{ "reject", &opt.rejects, cmd_vector },
|
||||
{ "rejectregex", &opt.rejectregex_s, cmd_string },
|
||||
{ "relativeonly", &opt.relative_only, cmd_boolean },
|
||||
{ "remoteencoding", &opt.encoding_remote, cmd_string },
|
||||
{ "removelisting", &opt.remove_listing, cmd_boolean },
|
||||
{ "reportspeed", &opt.report_bps, cmd_spec_report_speed},
|
||||
{ "restrictfilenames", NULL, cmd_spec_restrict_file_names },
|
||||
#ifdef ENABLE_METALINK
|
||||
{ "retries", &opt.n_retries, cmd_number_inf },
|
||||
@ -273,6 +286,17 @@ static const struct {
|
||||
{ "verbose", NULL, cmd_spec_verbose },
|
||||
{ "wait", &opt.wait, cmd_time },
|
||||
{ "waitretry", &opt.waitretry, cmd_time },
|
||||
{ "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
|
||||
{ "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
|
||||
#ifdef HAVE_LIBZ
|
||||
{ "warccompression", &opt.warc_compression_enabled, cmd_boolean },
|
||||
#endif
|
||||
{ "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
|
||||
{ "warcfile", &opt.warc_filename, cmd_file },
|
||||
{ "warcheader", NULL, cmd_spec_warc_header },
|
||||
{ "warckeeplog", &opt.warc_keep_log, cmd_boolean },
|
||||
{ "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
|
||||
{ "warctempdir", &opt.warc_tempdir, cmd_directory },
|
||||
#ifdef USE_WATT32
|
||||
{ "wdebug", &opt.wdebug, cmd_boolean },
|
||||
#endif
|
||||
@ -360,6 +384,8 @@ defaults (void)
|
||||
opt.restrict_files_nonascii = false;
|
||||
opt.restrict_files_case = restrict_no_case_restriction;
|
||||
|
||||
opt.regex_type = regex_type_posix;
|
||||
|
||||
opt.max_redirect = 20;
|
||||
|
||||
opt.waitretry = 10;
|
||||
@ -374,6 +400,18 @@ defaults (void)
|
||||
|
||||
opt.useservertimestamps = true;
|
||||
opt.show_all_dns_entries = false;
|
||||
|
||||
opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
|
||||
#ifdef HAVE_LIBZ
|
||||
opt.warc_compression_enabled = true;
|
||||
#else
|
||||
opt.warc_compression_enabled = false;
|
||||
#endif
|
||||
opt.warc_digests_enabled = true;
|
||||
opt.warc_cdx_enabled = false;
|
||||
opt.warc_cdx_dedup_filename = NULL;
|
||||
opt.warc_tempdir = NULL;
|
||||
opt.warc_keep_log = true;
|
||||
}
|
||||
|
||||
/* Return the user's home directory (strdup-ed), or NULL if none is
|
||||
@ -456,7 +494,7 @@ wgetrc_env_file_name (void)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check for the existance of '$HOME/.wgetrc' and return it's path
|
||||
/* Check for the existance of '$HOME/.wgetrc' and return its path
|
||||
if it exists and is set. */
|
||||
char *
|
||||
wgetrc_user_file_name (void)
|
||||
@ -610,21 +648,34 @@ initialize (void)
|
||||
variable has been set. For internal testing purposes only! */
|
||||
env_sysrc = getenv ("SYSTEM_WGETRC");
|
||||
if (env_sysrc && file_exists_p (env_sysrc))
|
||||
ok &= run_wgetrc (env_sysrc);
|
||||
{
|
||||
ok &= run_wgetrc (env_sysrc);
|
||||
/* If there are any problems parsing the system wgetrc file, tell
|
||||
the user and exit */
|
||||
if (! ok)
|
||||
{
|
||||
fprintf (stderr, _("\
|
||||
Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
|
||||
'%s',\n\
|
||||
or specify a different file using --config.\n"), env_sysrc);
|
||||
exit (2);
|
||||
}
|
||||
}
|
||||
/* Otherwise, if SYSTEM_WGETRC is defined, use it. */
|
||||
#ifdef SYSTEM_WGETRC
|
||||
else if (file_exists_p (SYSTEM_WGETRC))
|
||||
ok &= run_wgetrc (SYSTEM_WGETRC);
|
||||
#endif
|
||||
/* If there are any problems parsing the system wgetrc file, tell
|
||||
the user and exit */
|
||||
if (! ok)
|
||||
{
|
||||
fprintf (stderr, _("\
|
||||
Parsing system wgetrc file failed, please check '%s'. \
|
||||
Or specify a different file using --config\n"), SYSTEM_WGETRC);
|
||||
Parsing system wgetrc file failed. Please check\n\
|
||||
'%s',\n\
|
||||
or specify a different file using --config.\n"), SYSTEM_WGETRC);
|
||||
exit (2);
|
||||
}
|
||||
#endif
|
||||
/* Override it with your own, if one exists. */
|
||||
file = wgetrc_file_name ();
|
||||
if (!file)
|
||||
@ -1234,6 +1285,27 @@ cmd_spec_header (const char *com, const char *val, void *place_ignored)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
|
||||
{
|
||||
/* Empty value means reset the list of headers. */
|
||||
if (*val == '\0')
|
||||
{
|
||||
free_vec (opt.warc_user_headers);
|
||||
opt.warc_user_headers = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!check_user_specified_header (val))
|
||||
{
|
||||
fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
|
||||
exec_name, com, quote (val));
|
||||
return false;
|
||||
}
|
||||
opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
|
||||
{
|
||||
@ -1321,6 +1393,25 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Validate --regex-type and set the choice. */
|
||||
|
||||
static bool
|
||||
cmd_spec_regex_type (const char *com, const char *val, void *place_ignored)
|
||||
{
|
||||
static const struct decode_item choices[] = {
|
||||
{ "posix", regex_type_posix },
|
||||
#ifdef HAVE_LIBPCRE
|
||||
{ "pcre", regex_type_pcre },
|
||||
#endif
|
||||
};
|
||||
int regex_type = regex_type_posix;
|
||||
int ok = decode_string (val, choices, countof (choices), ®ex_type);
|
||||
if (!ok)
|
||||
fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
|
||||
opt.regex_type = regex_type;
|
||||
return ok;
|
||||
}
|
||||
|
||||
static bool
|
||||
cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
|
||||
{
|
||||
@ -1375,6 +1466,15 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
cmd_spec_report_speed (const char *com, const char *val, void *place_ignored)
|
||||
{
|
||||
opt.report_bps = strcasecmp (val, "bits") == 0;
|
||||
if (!opt.report_bps)
|
||||
fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
|
||||
return opt.report_bps;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SSL
|
||||
static bool
|
||||
cmd_spec_secure_protocol (const char *com, const char *val, void *place)
|
||||
@ -1589,8 +1689,16 @@ cleanup (void)
|
||||
{
|
||||
/* Free external resources, close files, etc. */
|
||||
|
||||
/* Close WARC file. */
|
||||
if (opt.warc_filename != 0)
|
||||
warc_close ();
|
||||
|
||||
log_close ();
|
||||
|
||||
if (output_stream)
|
||||
fclose (output_stream);
|
||||
if (fclose (output_stream) == EOF)
|
||||
inform_exit_status (CLOSEFAILED);
|
||||
|
||||
/* No need to check for error because Wget flushes its output (and
|
||||
checks for errors) after any data arrives. */
|
||||
|
||||
@ -1610,6 +1718,9 @@ cleanup (void)
|
||||
host_cleanup ();
|
||||
log_cleanup ();
|
||||
|
||||
for (i = 0; i < nurl; i++)
|
||||
xfree (url[i]);
|
||||
|
||||
{
|
||||
extern acc_t *netrc_list;
|
||||
free_netrc (netrc_list);
|
||||
@ -1638,6 +1749,7 @@ cleanup (void)
|
||||
xfree_null (opt.http_user);
|
||||
xfree_null (opt.http_passwd);
|
||||
free_vec (opt.user_headers);
|
||||
free_vec (opt.warc_user_headers);
|
||||
# ifdef HAVE_SSL
|
||||
xfree_null (opt.cert_file);
|
||||
xfree_null (opt.private_key);
|
||||
|
66
src/log.c
66
src/log.c
@ -79,6 +79,10 @@ as that of the covered work. */
|
||||
logging is inhibited, logfp is set back to NULL. */
|
||||
static FILE *logfp;
|
||||
|
||||
/* A second file descriptor pointing to the temporary log file for the
|
||||
WARC writer. If WARC writing is disabled, this is NULL. */
|
||||
static FILE *warclogfp;
|
||||
|
||||
/* If true, it means logging is inhibited, i.e. nothing is printed or
|
||||
stored. */
|
||||
static bool inhibit_logging;
|
||||
@ -304,6 +308,31 @@ get_log_fp (void)
|
||||
return logfp;
|
||||
return stderr;
|
||||
}
|
||||
|
||||
/* Returns the file descriptor for the secondary log file. This is
|
||||
WARCLOGFP, except if called before log_init, in which case it
|
||||
returns stderr. This is useful in case someone calls a logging
|
||||
function before log_init.
|
||||
|
||||
If logging is inhibited, return NULL. */
|
||||
|
||||
static FILE *
|
||||
get_warc_log_fp (void)
|
||||
{
|
||||
if (inhibit_logging)
|
||||
return NULL;
|
||||
if (warclogfp)
|
||||
return warclogfp;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Sets the file descriptor for the secondary log file. */
|
||||
|
||||
void
|
||||
log_set_warc_log_fp (FILE * fp)
|
||||
{
|
||||
warclogfp = fp;
|
||||
}
|
||||
|
||||
/* Log a literal string S. The string is logged as-is, without a
|
||||
newline appended. */
|
||||
@ -312,13 +341,17 @@ void
|
||||
logputs (enum log_options o, const char *s)
|
||||
{
|
||||
FILE *fp;
|
||||
FILE *warcfp;
|
||||
|
||||
check_redirect_output ();
|
||||
if ((fp = get_log_fp ()) == NULL)
|
||||
return;
|
||||
warcfp = get_warc_log_fp ();
|
||||
CHECK_VERBOSE (o);
|
||||
|
||||
FPUTS (s, fp);
|
||||
if (warcfp != NULL)
|
||||
FPUTS (s, warcfp);
|
||||
if (save_context_p)
|
||||
saved_append (s);
|
||||
if (flush_log_p)
|
||||
@ -356,8 +389,9 @@ log_vprintf_internal (struct logvprintf_state *state, const char *fmt,
|
||||
int available_size = sizeof (smallmsg);
|
||||
int numwritten;
|
||||
FILE *fp = get_log_fp ();
|
||||
FILE *warcfp = get_warc_log_fp ();
|
||||
|
||||
if (!save_context_p)
|
||||
if (!save_context_p && warcfp == NULL)
|
||||
{
|
||||
/* In the simple case just call vfprintf(), to avoid needless
|
||||
allocation and games with vsnprintf(). */
|
||||
@ -407,8 +441,11 @@ log_vprintf_internal (struct logvprintf_state *state, const char *fmt,
|
||||
}
|
||||
|
||||
/* Writing succeeded. */
|
||||
saved_append (write_ptr);
|
||||
if (save_context_p)
|
||||
saved_append (write_ptr);
|
||||
FPUTS (write_ptr, fp);
|
||||
if (warcfp != NULL)
|
||||
FPUTS (write_ptr, warcfp);
|
||||
if (state->bigmsg)
|
||||
xfree (state->bigmsg);
|
||||
|
||||
@ -426,6 +463,7 @@ void
|
||||
logflush (void)
|
||||
{
|
||||
FILE *fp = get_log_fp ();
|
||||
FILE *warcfp = get_warc_log_fp ();
|
||||
if (fp)
|
||||
{
|
||||
/* 2005-10-25 SMS.
|
||||
@ -440,6 +478,10 @@ logflush (void)
|
||||
fflush (fp);
|
||||
#endif /* def __VMS [else] */
|
||||
}
|
||||
|
||||
if (warcfp != NULL)
|
||||
fflush (warcfp);
|
||||
|
||||
needs_flushing = false;
|
||||
}
|
||||
|
||||
@ -573,14 +615,14 @@ log_init (const char *file, bool appendp)
|
||||
}
|
||||
}
|
||||
|
||||
/* Close LOGFP, inhibit further logging and free the memory associated
|
||||
with it. */
|
||||
/* Close LOGFP (only if we opened it, not if it's stderr), inhibit
|
||||
further logging and free the memory associated with it. */
|
||||
void
|
||||
log_close (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (logfp)
|
||||
if (logfp && (logfp != stderr))
|
||||
fclose (logfp);
|
||||
logfp = NULL;
|
||||
inhibit_logging = true;
|
||||
@ -598,6 +640,7 @@ log_dump_context (void)
|
||||
{
|
||||
int num = log_line_current;
|
||||
FILE *fp = get_log_fp ();
|
||||
FILE *warcfp = get_warc_log_fp ();
|
||||
if (!fp)
|
||||
return;
|
||||
|
||||
@ -609,14 +652,23 @@ log_dump_context (void)
|
||||
{
|
||||
struct log_ln *ln = log_lines + num;
|
||||
if (ln->content)
|
||||
FPUTS (ln->content, fp);
|
||||
{
|
||||
FPUTS (ln->content, fp);
|
||||
if (warcfp != NULL)
|
||||
FPUTS (ln->content, warcfp);
|
||||
}
|
||||
ROT_ADVANCE (num);
|
||||
}
|
||||
while (num != log_line_current);
|
||||
if (trailing_line)
|
||||
if (log_lines[log_line_current].content)
|
||||
FPUTS (log_lines[log_line_current].content, fp);
|
||||
{
|
||||
FPUTS (log_lines[log_line_current].content, fp);
|
||||
if (warcfp != NULL)
|
||||
FPUTS (log_lines[log_line_current].content, warcfp);
|
||||
}
|
||||
fflush (fp);
|
||||
fflush (warcfp);
|
||||
}
|
||||
|
||||
/* String escape functions. */
|
||||
|
@ -34,8 +34,12 @@ as that of the covered work. */
|
||||
/* The log file to which Wget writes to after HUP. */
|
||||
#define DEFAULT_LOGFILE "wget-log"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
enum log_options { LOG_VERBOSE, LOG_NOTQUIET, LOG_NONVERBOSE, LOG_ALWAYS };
|
||||
|
||||
void log_set_warc_log_fp (FILE *);
|
||||
|
||||
void logprintf (enum log_options, const char *, ...)
|
||||
GCC_FORMAT_ATTR (2, 3);
|
||||
void debug_logprintf (const char *, ...) GCC_FORMAT_ATTR (1, 2);
|
||||
|
214
src/main.c
214
src/main.c
@ -1,6 +1,6 @@
|
||||
/* Command line parsing.
|
||||
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
|
||||
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -55,7 +55,7 @@ as that of the covered work. */
|
||||
#include "spider.h"
|
||||
#include "http.h" /* for save_cookies */
|
||||
#include "ptimer.h"
|
||||
|
||||
#include "warc.h"
|
||||
#include <getopt.h>
|
||||
#include <getpass.h>
|
||||
#include <quote.h>
|
||||
@ -157,6 +157,7 @@ struct cmdline_option {
|
||||
static struct cmdline_option option_data[] =
|
||||
{
|
||||
{ "accept", 'A', OPT_VALUE, "accept", -1 },
|
||||
{ "accept-regex", 0, OPT_VALUE, "acceptregex", -1 },
|
||||
{ "adjust-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 },
|
||||
{ "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
|
||||
{ "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
|
||||
@ -249,7 +250,7 @@ static struct cmdline_option option_data[] =
|
||||
{ "post-data", 0, OPT_VALUE, "postdata", -1 },
|
||||
{ "post-file", 0, OPT_VALUE, "postfile", -1 },
|
||||
{ "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
|
||||
{ "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 }, /* deprecated */
|
||||
{ "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
|
||||
{ IF_SSL ("private-key"), 0, OPT_VALUE, "privatekey", -1 },
|
||||
{ IF_SSL ("private-key-type"), 0, OPT_VALUE, "privatekeytype", -1 },
|
||||
{ "progress", 0, OPT_VALUE, "progress", -1 },
|
||||
@ -266,10 +267,13 @@ static struct cmdline_option option_data[] =
|
||||
{ "read-timeout", 0, OPT_VALUE, "readtimeout", -1 },
|
||||
{ "recursive", 'r', OPT_BOOLEAN, "recursive", -1 },
|
||||
{ "referer", 0, OPT_VALUE, "referer", -1 },
|
||||
{ "regex-type", 0, OPT_VALUE, "regextype", -1 },
|
||||
{ "reject", 'R', OPT_VALUE, "reject", -1 },
|
||||
{ "reject-regex", 0, OPT_VALUE, "rejectregex", -1 },
|
||||
{ "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
|
||||
{ "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1 },
|
||||
{ "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
|
||||
{ "report-speed", 0, OPT_BOOLEAN, "reportspeed", -1 },
|
||||
{ "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
|
||||
{ "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
|
||||
#ifdef ENABLE_METALINK
|
||||
@ -296,6 +300,17 @@ static struct cmdline_option option_data[] =
|
||||
{ "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
|
||||
{ "wait", 'w', OPT_VALUE, "wait", -1 },
|
||||
{ "waitretry", 0, OPT_VALUE, "waitretry", -1 },
|
||||
{ "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
|
||||
#ifdef HAVE_LIBZ
|
||||
{ "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
|
||||
#endif
|
||||
{ "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
|
||||
{ "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
|
||||
{ "warc-file", 0, OPT_VALUE, "warcfile", -1 },
|
||||
{ "warc-header", 0, OPT_VALUE, "warcheader", -1 },
|
||||
{ "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
|
||||
{ "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
|
||||
{ "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
|
||||
#ifdef USE_WATT32
|
||||
{ "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
|
||||
#endif
|
||||
@ -453,6 +468,8 @@ Logging and input file:\n"),
|
||||
-v, --verbose be verbose (this is the default).\n"),
|
||||
N_("\
|
||||
-nv, --no-verbose turn off verboseness, without being quiet.\n"),
|
||||
N_("\
|
||||
--report-speed=TYPE Output bandwidth as TYPE. TYPE can be bits.\n"),
|
||||
N_("\
|
||||
-i, --input-file=FILE download URLs found in local or external FILE.\n"),
|
||||
N_("\
|
||||
@ -654,10 +671,37 @@ FTP options:\n"),
|
||||
--no-glob turn off FTP file name globbing.\n"),
|
||||
N_("\
|
||||
--no-passive-ftp disable the \"passive\" transfer mode.\n"),
|
||||
N_("\
|
||||
--preserve-permissions preserve remote file permissions.\n"),
|
||||
N_("\
|
||||
--retr-symlinks when recursing, get linked-to files (not dir).\n"),
|
||||
"\n",
|
||||
|
||||
N_("\
|
||||
WARC options:\n"),
|
||||
N_("\
|
||||
--warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
|
||||
N_("\
|
||||
--warc-header=STRING insert STRING into the warcinfo record.\n"),
|
||||
N_("\
|
||||
--warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
|
||||
N_("\
|
||||
--warc-cdx write CDX index files.\n"),
|
||||
N_("\
|
||||
--warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
|
||||
#ifdef HAVE_LIBZ
|
||||
N_("\
|
||||
--no-warc-compression do not compress WARC files with GZIP.\n"),
|
||||
#endif
|
||||
N_("\
|
||||
--no-warc-digests do not calculate SHA1 digests.\n"),
|
||||
N_("\
|
||||
--no-warc-keep-log do not store the log file in a WARC record.\n"),
|
||||
N_("\
|
||||
--warc-tempdir=DIRECTORY location for temporary files created by the\n\
|
||||
WARC writer.\n"),
|
||||
"\n",
|
||||
|
||||
N_("\
|
||||
Recursive download:\n"),
|
||||
N_("\
|
||||
@ -694,6 +738,17 @@ Recursive accept/reject:\n"),
|
||||
-A, --accept=LIST comma-separated list of accepted extensions.\n"),
|
||||
N_("\
|
||||
-R, --reject=LIST comma-separated list of rejected extensions.\n"),
|
||||
N_("\
|
||||
--accept-regex=REGEX regex matching accepted URLs.\n"),
|
||||
N_("\
|
||||
--reject-regex=REGEX regex matching rejected URLs.\n"),
|
||||
#ifdef HAVE_LIBPCRE
|
||||
N_("\
|
||||
--regex-type=TYPE regex type (posix|pcre).\n"),
|
||||
#else
|
||||
N_("\
|
||||
--regex-type=TYPE regex type (posix).\n"),
|
||||
#endif
|
||||
N_("\
|
||||
-D, --domains=LIST comma-separated list of accepted domains.\n"),
|
||||
N_("\
|
||||
@ -718,7 +773,6 @@ Recursive accept/reject:\n"),
|
||||
N_("\
|
||||
-np, --no-parent don't ascend to the parent directory.\n"),
|
||||
"\n",
|
||||
|
||||
N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
|
||||
};
|
||||
|
||||
@ -920,6 +974,7 @@ There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0)
|
||||
}
|
||||
|
||||
char *program_name; /* Needed by lib/error.c. */
|
||||
char *program_argstring; /* Needed by wget_warc.c. */
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
@ -955,13 +1010,34 @@ main (int argc, char **argv)
|
||||
windows_main ((char **) &exec_name);
|
||||
#endif
|
||||
|
||||
/* Construct the arguments string. */
|
||||
int argstring_length = 1;
|
||||
for (i = 1; i < argc; i++)
|
||||
argstring_length += strlen (argv[i]) + 2 + 1;
|
||||
char *p = program_argstring = malloc (argstring_length * sizeof (char));
|
||||
if (p == NULL)
|
||||
{
|
||||
fprintf (stderr, _("Memory allocation problem\n"));
|
||||
exit (2);
|
||||
}
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
*p++ = '"';
|
||||
int arglen = strlen (argv[i]);
|
||||
memcpy (p, argv[i], arglen);
|
||||
p += arglen;
|
||||
*p++ = '"';
|
||||
*p++ = ' ';
|
||||
}
|
||||
*p = '\0';
|
||||
|
||||
/* Load the hard-coded defaults. */
|
||||
defaults ();
|
||||
|
||||
init_switches ();
|
||||
|
||||
/* This seperate getopt_long is needed to find the user config
|
||||
and parse it before the other user options. */
|
||||
/* This separate getopt_long is needed to find the user config file
|
||||
option ("--config") and parse it before the other user options. */
|
||||
longindex = -1;
|
||||
int retconf;
|
||||
bool use_userconfig = false;
|
||||
@ -972,20 +1048,25 @@ main (int argc, char **argv)
|
||||
int confval;
|
||||
bool userrc_ret = true;
|
||||
struct cmdline_option *config_opt;
|
||||
confval = long_options[longindex].val;
|
||||
config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
|
||||
if (strcmp (config_opt->long_name, "config") == 0)
|
||||
|
||||
/* There is no short option for "--config". */
|
||||
if (longindex >= 0)
|
||||
{
|
||||
userrc_ret &= run_wgetrc (optarg);
|
||||
use_userconfig = true;
|
||||
confval = long_options[longindex].val;
|
||||
config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
|
||||
if (strcmp (config_opt->long_name, "config") == 0)
|
||||
{
|
||||
userrc_ret &= run_wgetrc (optarg);
|
||||
use_userconfig = true;
|
||||
}
|
||||
if (!userrc_ret)
|
||||
{
|
||||
fprintf (stderr, "Exiting due to error in %s\n", optarg);
|
||||
exit (2);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (!userrc_ret)
|
||||
{
|
||||
printf ("Exiting due to error in %s\n", optarg);
|
||||
exit (2);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
|
||||
@ -1008,9 +1089,10 @@ main (int argc, char **argv)
|
||||
{
|
||||
if (ret == '?')
|
||||
{
|
||||
print_usage (0);
|
||||
printf ("\n");
|
||||
printf (_("Try `%s --help' for more options.\n"), exec_name);
|
||||
print_usage (1);
|
||||
fprintf (stderr, "\n");
|
||||
fprintf (stderr, _("Try `%s --help' for more options.\n"),
|
||||
exec_name);
|
||||
exit (2);
|
||||
}
|
||||
/* Find the short option character in the mapping. */
|
||||
@ -1118,7 +1200,7 @@ main (int argc, char **argv)
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("Both --no-clobber and --convert-links were specified,"
|
||||
"only --convert-links will be used.\n"));
|
||||
" only --convert-links will be used.\n"));
|
||||
opt.noclobber = false;
|
||||
}
|
||||
|
||||
@ -1199,6 +1281,47 @@ for details.\n\n"));
|
||||
}
|
||||
}
|
||||
|
||||
if (opt.warc_filename != 0)
|
||||
{
|
||||
if (opt.noclobber)
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("WARC output does not work with --no-clobber, "
|
||||
"--no-clobber will be disabled.\n"));
|
||||
opt.noclobber = false;
|
||||
}
|
||||
if (opt.timestamping)
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("WARC output does not work with timestamping, "
|
||||
"timestamping will be disabled.\n"));
|
||||
opt.timestamping = false;
|
||||
}
|
||||
if (opt.spider)
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("WARC output does not work with --spider.\n"));
|
||||
exit (1);
|
||||
}
|
||||
if (opt.always_rest)
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("WARC output does not work with --continue, "
|
||||
"--continue will be disabled.\n"));
|
||||
opt.always_rest = false;
|
||||
}
|
||||
if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("Digests are disabled; WARC deduplication will "
|
||||
"not find duplicate records.\n"));
|
||||
}
|
||||
if (opt.warc_keep_log)
|
||||
{
|
||||
opt.progress_type = xstrdup ("dot");
|
||||
}
|
||||
}
|
||||
|
||||
if (opt.ask_passwd && opt.passwd)
|
||||
{
|
||||
fprintf (stderr,
|
||||
@ -1216,13 +1339,42 @@ for details.\n\n"));
|
||||
/* No URL specified. */
|
||||
fprintf (stderr, _("%s: missing URL\n"), exec_name);
|
||||
print_usage (1);
|
||||
printf ("\n");
|
||||
fprintf (stderr, "\n");
|
||||
/* #### Something nicer should be printed here -- similar to the
|
||||
pre-1.5 `--help' page. */
|
||||
fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name);
|
||||
exit (1);
|
||||
}
|
||||
|
||||
/* Compile the regular expressions. */
|
||||
switch (opt.regex_type)
|
||||
{
|
||||
#ifdef HAVE_LIBPCRE
|
||||
case regex_type_pcre:
|
||||
opt.regex_compile_fun = compile_pcre_regex;
|
||||
opt.regex_match_fun = match_pcre_regex;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case regex_type_posix:
|
||||
default:
|
||||
opt.regex_compile_fun = compile_posix_regex;
|
||||
opt.regex_match_fun = match_posix_regex;
|
||||
break;
|
||||
}
|
||||
if (opt.acceptregex_s)
|
||||
{
|
||||
opt.acceptregex = opt.regex_compile_fun (opt.acceptregex_s);
|
||||
if (!opt.acceptregex)
|
||||
exit (1);
|
||||
}
|
||||
if (opt.rejectregex_s)
|
||||
{
|
||||
opt.rejectregex = opt.regex_compile_fun (opt.rejectregex_s);
|
||||
if (!opt.rejectregex)
|
||||
exit (1);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_IRI
|
||||
if (opt.enable_iri)
|
||||
{
|
||||
@ -1269,6 +1421,11 @@ for details.\n\n"));
|
||||
|
||||
/* Fill in the arguments. */
|
||||
url = alloca_array (char *, nurl + 1);
|
||||
if (url == NULL)
|
||||
{
|
||||
fprintf (stderr, _("Memory allocation problem\n"));
|
||||
exit (2);
|
||||
}
|
||||
for (i = 0; i < nurl; i++, optind++)
|
||||
{
|
||||
char *rewritten = rewrite_shorthand_url (argv[optind]);
|
||||
@ -1282,6 +1439,10 @@ for details.\n\n"));
|
||||
/* Initialize logging. */
|
||||
log_init (opt.lfilename, append_to_log);
|
||||
|
||||
/* Open WARC file. */
|
||||
if (opt.warc_filename != 0)
|
||||
warc_init ();
|
||||
|
||||
DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
|
||||
version_string, OS_TYPE));
|
||||
|
||||
@ -1437,7 +1598,7 @@ outputting to a regular file.\n"));
|
||||
&dt, opt.recursive, iri, true, NULL);
|
||||
}
|
||||
|
||||
if (opt.delete_after && file_exists_p(filename))
|
||||
if (opt.delete_after && filename != NULL && file_exists_p (filename))
|
||||
{
|
||||
DEBUGP (("Removing file due to --delete-after in main():\n"));
|
||||
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
|
||||
@ -1524,12 +1685,9 @@ outputting to a regular file.\n"));
|
||||
if (opt.convert_links && !opt.delete_after)
|
||||
convert_all_links ();
|
||||
|
||||
log_close ();
|
||||
for (i = 0; i < nurl; i++)
|
||||
xfree (url[i]);
|
||||
cleanup ();
|
||||
|
||||
return get_exit_status ();
|
||||
exit (get_exit_status ());
|
||||
}
|
||||
#endif /* TESTING */
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SSL support via OpenSSL library.
|
||||
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
|
||||
2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
Originally contributed by Christian Fraenkel.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -159,7 +159,7 @@ key_type_to_ssl_type (enum keyfile_type type)
|
||||
Returns true on success, false otherwise. */
|
||||
|
||||
bool
|
||||
ssl_init ()
|
||||
ssl_init (void)
|
||||
{
|
||||
SSL_METHOD const *meth;
|
||||
|
||||
@ -201,7 +201,9 @@ ssl_init ()
|
||||
abort ();
|
||||
}
|
||||
|
||||
ssl_ctx = SSL_CTX_new (meth);
|
||||
/* The type cast below accommodates older OpenSSL versions (0.9.8)
|
||||
where SSL_CTX_new() is declared without a "const" argument. */
|
||||
ssl_ctx = SSL_CTX_new ((SSL_METHOD *)meth);
|
||||
if (!ssl_ctx)
|
||||
goto error;
|
||||
|
||||
@ -393,7 +395,7 @@ static struct transport_implementation openssl_transport = {
|
||||
Returns true on success, false on failure. */
|
||||
|
||||
bool
|
||||
ssl_connect_wget (int fd)
|
||||
ssl_connect_wget (int fd, const char *hostname)
|
||||
{
|
||||
SSL *conn;
|
||||
struct openssl_transport_context *ctx;
|
||||
@ -404,6 +406,19 @@ ssl_connect_wget (int fd)
|
||||
conn = SSL_new (ssl_ctx);
|
||||
if (!conn)
|
||||
goto error;
|
||||
#if OPENSSL_VERSION_NUMBER >= 0x0090806fL && !defined(OPENSSL_NO_TLSEXT)
|
||||
/* If the SSL library was build with support for ServerNameIndication
|
||||
then use it whenever we have a hostname. If not, don't, ever. */
|
||||
if (! is_valid_ip_address (hostname))
|
||||
{
|
||||
if (! SSL_set_tlsext_host_name (conn, hostname))
|
||||
{
|
||||
DEBUGP (("Failed to set TLS server-name indication."));
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef FD_TO_SOCKET
|
||||
# define FD_TO_SOCKET(X) (X)
|
||||
#endif
|
||||
|
@ -78,6 +78,19 @@ struct options
|
||||
bool ignore_case; /* Whether to ignore case when
|
||||
matching dirs and files */
|
||||
|
||||
char *acceptregex_s; /* Patterns to accept (a regex string). */
|
||||
char *rejectregex_s; /* Patterns to reject (a regex string). */
|
||||
void *acceptregex; /* Patterns to accept (a regex struct). */
|
||||
void *rejectregex; /* Patterns to reject (a regex struct). */
|
||||
enum {
|
||||
#ifdef HAVE_LIBPCRE
|
||||
regex_type_pcre,
|
||||
#endif
|
||||
regex_type_posix
|
||||
} regex_type; /* The regex library. */
|
||||
void *(*regex_compile_fun)(const char *); /* Function to compile a regex. */
|
||||
bool (*regex_match_fun)(const void *, const char *); /* Function to match a string to a regex. */
|
||||
|
||||
char **domains; /* See host.c */
|
||||
char **exclude_domains;
|
||||
bool dns_cache; /* whether we cache DNS lookups. */
|
||||
@ -91,6 +104,15 @@ struct options
|
||||
FTP. */
|
||||
char *output_document; /* The output file to which the
|
||||
documents will be printed. */
|
||||
char *warc_filename; /* WARC output filename */
|
||||
char *warc_tempdir; /* WARC temp dir */
|
||||
char *warc_cdx_dedup_filename; /* CDX file to be used for deduplication. */
|
||||
wgint warc_maxsize; /* WARC max archive size */
|
||||
bool warc_compression_enabled; /* For GZIP compression. */
|
||||
bool warc_digests_enabled; /* For SHA1 digests. */
|
||||
bool warc_cdx_enabled; /* Create CDX files? */
|
||||
bool warc_keep_log; /* Store the log file in a WARC record. */
|
||||
char **warc_user_headers; /* User-defined WARC header(s). */
|
||||
|
||||
char *user; /* Generic username */
|
||||
char *passwd; /* Generic password */
|
||||
@ -261,6 +283,9 @@ struct options
|
||||
|
||||
bool show_all_dns_entries; /* Show all the DNS entries when resolving a
|
||||
name. */
|
||||
|
||||
bool report_bps; /*Output bandwidth in bits format*/
|
||||
|
||||
#ifdef ENABLE_THREADS
|
||||
int jobs; /* How many threads use at the same time. */
|
||||
#endif
|
||||
|
@ -766,7 +766,7 @@ update_speed_ring (struct bar_progress *bp, wgint howmuch, double dltime)
|
||||
}
|
||||
|
||||
#if USE_NLS_PROGRESS_BAR
|
||||
int
|
||||
static int
|
||||
count_cols (const char *mbs)
|
||||
{
|
||||
wchar_t wc;
|
||||
@ -795,7 +795,7 @@ count_cols (const char *mbs)
|
||||
# define count_cols(mbs) ((int)(strlen(mbs)))
|
||||
#endif
|
||||
|
||||
const char *
|
||||
static const char *
|
||||
get_eta (int *bcd)
|
||||
{
|
||||
/* TRANSLATORS: "ETA" is English-centric, but this must
|
||||
@ -861,7 +861,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
struct bar_progress_hist *hist = &bp->hist;
|
||||
|
||||
/* The progress bar should look like this:
|
||||
xx% [=======> ] nn,nnn 12.34K/s eta 36m 51s
|
||||
xx% [=======> ] nn,nnn 12.34KB/s eta 36m 51s
|
||||
|
||||
Calculate the geometry. The idea is to assign as much room as
|
||||
possible to the progress bar. The other idea is to never let
|
||||
@ -873,7 +873,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
"xx% " or "100%" - percentage - 4 chars
|
||||
"[]" - progress bar decorations - 2 chars
|
||||
" nnn,nnn,nnn" - downloaded bytes - 12 chars or very rarely more
|
||||
" 12.5K/s" - download rate - 8 chars
|
||||
" 12.5KB/s" - download rate - 9 chars
|
||||
" eta 36m 51s" - ETA - 14 chars
|
||||
|
||||
"=====>..." - progress bar - the rest
|
||||
@ -977,10 +977,11 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
*p++ = ' ';
|
||||
}
|
||||
|
||||
/* " 12.52K/s" */
|
||||
/* " 12.52Kb/s or 12.52KB/s" */
|
||||
if (hist->total_time > 0 && hist->total_bytes)
|
||||
{
|
||||
static const char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
|
||||
static const char *short_units[] = { "B/s", "KB/s", "MB/s", "GB/s" };
|
||||
static const char *short_units_bits[] = { "b/s", "Kb/s", "Mb/s", "Gb/s" };
|
||||
int units = 0;
|
||||
/* Calculate the download speed using the history ring and
|
||||
recent data that hasn't made it to the ring yet. */
|
||||
@ -988,7 +989,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
|
||||
double dltime = hist->total_time + (dl_total_time - bp->recent_start);
|
||||
double dlspeed = calc_rate (dlquant, dltime, &units);
|
||||
sprintf (p, " %4.*f%s", dlspeed >= 99.95 ? 0 : dlspeed >= 9.995 ? 1 : 2,
|
||||
dlspeed, short_units[units]);
|
||||
dlspeed, !opt.report_bps ? short_units[units] : short_units_bits[units]);
|
||||
move_to_end (p);
|
||||
}
|
||||
else
|
||||
|
@ -59,9 +59,7 @@ as that of the covered work. */
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
|
||||
/* Cygwin currently (as of 2005-04-08, Cygwin 1.5.14) lacks clock_getres,
|
||||
but still defines _POSIX_TIMERS! Because of that we simply use the
|
||||
|
@ -763,6 +763,11 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (!accept_url (url))
|
||||
{
|
||||
DEBUGP (("%s is excluded/not-included through regex.\n", url));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 6. Check for acceptance/rejection rules. We ignore these rules
|
||||
for directories (no file name to match) and for non-leaf HTMLs,
|
||||
|
103
src/retr.c
103
src/retr.c
@ -162,13 +162,16 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
|
||||
|
||||
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
|
||||
amount of data and decrease SKIP. Increment *TOTAL by the amount
|
||||
of data written. */
|
||||
of data written. If OUT2 is not NULL, also write BUF to OUT2.
|
||||
In case of error writing to OUT, -1 is returned. In case of error
|
||||
writing to OUT2, -2 is returned. In case of any other error,
|
||||
1 is returned. */
|
||||
|
||||
static int
|
||||
write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
|
||||
wgint *written)
|
||||
write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
|
||||
wgint *skip, wgint *written)
|
||||
{
|
||||
if (!out)
|
||||
if (out == NULL && out2 == NULL)
|
||||
return 1;
|
||||
if (*skip > bufsize)
|
||||
{
|
||||
@ -184,7 +187,10 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
|
||||
return 1;
|
||||
}
|
||||
|
||||
fwrite (buf, 1, bufsize, out);
|
||||
if (out != NULL)
|
||||
fwrite (buf, 1, bufsize, out);
|
||||
if (out2 != NULL)
|
||||
fwrite (buf, 1, bufsize, out2);
|
||||
*written += bufsize;
|
||||
|
||||
/* Immediately flush the downloaded data. This should not hinder
|
||||
@ -201,9 +207,17 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
|
||||
actual justification. (Also, why 16K? Anyone test other values?)
|
||||
*/
|
||||
#ifndef __VMS
|
||||
fflush (out);
|
||||
if (out != NULL)
|
||||
fflush (out);
|
||||
if (out2 != NULL)
|
||||
fflush (out2);
|
||||
#endif /* ndef __VMS */
|
||||
return !ferror (out);
|
||||
if (out != NULL && ferror (out))
|
||||
return -1;
|
||||
else if (out2 != NULL && ferror (out2))
|
||||
return -2;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Read the contents of file descriptor FD until it the connection
|
||||
@ -221,13 +235,20 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
|
||||
the amount of data written to disk. The time it took to download
|
||||
the data is stored to ELAPSED.
|
||||
|
||||
If OUT2 is non-NULL, the contents is also written to OUT2.
|
||||
OUT2 will get an exact copy of the response: if this is a chunked
|
||||
response, everything -- including the chunk headers -- is written
|
||||
to OUT2. (OUT will only get the unchunked response.)
|
||||
|
||||
The function exits and returns the amount of data read. In case of
|
||||
error while reading data, -1 is returned. In case of error while
|
||||
writing data, -2 is returned. */
|
||||
writing data to OUT, -2 is returned. In case of error while writing
|
||||
data to OUT2, -3 is returned. */
|
||||
|
||||
int
|
||||
fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
|
||||
wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
|
||||
wgint *qtyread, wgint *qtywritten, double *elapsed, int flags,
|
||||
FILE *out2)
|
||||
{
|
||||
int ret = 0;
|
||||
#undef max
|
||||
@ -310,13 +331,24 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
else if (out2 != NULL)
|
||||
fwrite (line, 1, strlen (line), out2);
|
||||
|
||||
remaining_chunk_size = strtol (line, &endl, 16);
|
||||
xfree (line);
|
||||
|
||||
if (remaining_chunk_size == 0)
|
||||
{
|
||||
ret = 0;
|
||||
if (fd_read_line (fd) == NULL)
|
||||
line = fd_read_line (fd);
|
||||
if (line == NULL)
|
||||
ret = -1;
|
||||
else
|
||||
{
|
||||
if (out2 != NULL)
|
||||
fwrite (line, 1, strlen (line), out2);
|
||||
xfree (line);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -366,20 +398,30 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
|
||||
if (ret > 0)
|
||||
{
|
||||
sum_read += ret;
|
||||
if (!write_data (out, dlbuf, ret, &skip, &sum_written))
|
||||
int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
|
||||
if (write_res != 0)
|
||||
{
|
||||
ret = -2;
|
||||
ret = (write_res == -3) ? -3 : -2;
|
||||
goto out;
|
||||
}
|
||||
if (chunked)
|
||||
{
|
||||
remaining_chunk_size -= ret;
|
||||
if (remaining_chunk_size == 0)
|
||||
if (fd_read_line (fd) == NULL)
|
||||
{
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
{
|
||||
char *line = fd_read_line (fd);
|
||||
if (line == NULL)
|
||||
{
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (out2 != NULL)
|
||||
fwrite (line, 1, strlen (line), out2);
|
||||
xfree (line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -601,6 +643,7 @@ retr_rate (wgint bytes, double secs)
|
||||
{
|
||||
static char res[20];
|
||||
static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
|
||||
static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" };
|
||||
int units;
|
||||
|
||||
double dlrate = calc_rate (bytes, secs, &units);
|
||||
@ -608,7 +651,7 @@ retr_rate (wgint bytes, double secs)
|
||||
e.g. "1022", "247", "12.5", "2.38". */
|
||||
sprintf (res, "%.*f %s",
|
||||
dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
|
||||
dlrate, rate_names[units]);
|
||||
dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -625,6 +668,11 @@ double
|
||||
calc_rate (wgint bytes, double secs, int *units)
|
||||
{
|
||||
double dlrate;
|
||||
double bibyte = 1000.0;
|
||||
|
||||
if (!opt.report_bps)
|
||||
bibyte = 1024.0;
|
||||
|
||||
|
||||
assert (secs >= 0);
|
||||
assert (bytes >= 0);
|
||||
@ -636,16 +684,17 @@ calc_rate (wgint bytes, double secs, int *units)
|
||||
0 and the timer's resolution, assume half the resolution. */
|
||||
secs = ptimer_resolution () / 2.0;
|
||||
|
||||
dlrate = bytes / secs;
|
||||
if (dlrate < 1024.0)
|
||||
dlrate = convert_to_bits (bytes) / secs;
|
||||
if (dlrate < bibyte)
|
||||
*units = 0;
|
||||
else if (dlrate < 1024.0 * 1024.0)
|
||||
*units = 1, dlrate /= 1024.0;
|
||||
else if (dlrate < 1024.0 * 1024.0 * 1024.0)
|
||||
*units = 2, dlrate /= (1024.0 * 1024.0);
|
||||
else if (dlrate < (bibyte * bibyte))
|
||||
*units = 1, dlrate /= bibyte;
|
||||
else if (dlrate < (bibyte * bibyte * bibyte))
|
||||
*units = 2, dlrate /= (bibyte * bibyte);
|
||||
|
||||
else
|
||||
/* Maybe someone will need this, one day. */
|
||||
*units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
|
||||
*units = 3, dlrate /= (bibyte * bibyte * bibyte);
|
||||
|
||||
return dlrate;
|
||||
}
|
||||
@ -911,10 +960,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
||||
register_redirection (origurl, u->url);
|
||||
|
||||
if (*dt & TEXTHTML)
|
||||
register_html (u->url, local_file);
|
||||
register_html (local_file);
|
||||
|
||||
if (*dt & TEXTCSS)
|
||||
register_css (u->url, local_file);
|
||||
register_css (local_file);
|
||||
}
|
||||
|
||||
if (file)
|
||||
|
@ -50,7 +50,7 @@ enum {
|
||||
rb_chunked_transfer_encoding = 4
|
||||
};
|
||||
|
||||
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
|
||||
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
|
||||
|
||||
typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
|
||||
|
||||
|
@ -45,7 +45,7 @@ static struct hash_table *nonexisting_urls_set;
|
||||
|
||||
/* Cleanup the data structures associated with this file. */
|
||||
|
||||
void
|
||||
static void
|
||||
spider_cleanup (void)
|
||||
{
|
||||
if (nonexisting_urls_set)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* SSL support.
|
||||
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
|
||||
2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
Originally contributed by Christian Fraenkel.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
@ -33,7 +33,7 @@ as that of the covered work. */
|
||||
#define GEN_SSLFUNC_H
|
||||
|
||||
bool ssl_init (void);
|
||||
bool ssl_connect_wget (int);
|
||||
bool ssl_connect_wget (int, const char *);
|
||||
bool ssl_check_certificate (int, const char *);
|
||||
|
||||
#endif /* GEN_SSLFUNC_H */
|
||||
|
@ -46,6 +46,8 @@ const char *test_append_uri_pathel();
|
||||
const char *test_are_urls_equal();
|
||||
const char *test_is_robots_txt_url();
|
||||
|
||||
const char *program_argstring = "TEST";
|
||||
|
||||
int tests_run;
|
||||
|
||||
static const char *
|
||||
|
11
src/url.c
11
src/url.c
@ -1503,9 +1503,9 @@ url_file_name (const struct url *u, char *replaced_filename)
|
||||
{
|
||||
struct growable fnres; /* stands for "file name result" */
|
||||
|
||||
const char *u_file, *u_query;
|
||||
const char *u_file;
|
||||
char *fname, *unique;
|
||||
char *index_filename = "index.html"; /* The default index file is index.html */
|
||||
const char *index_filename = "index.html"; /* The default index file is index.html */
|
||||
|
||||
fnres.base = NULL;
|
||||
fnres.size = 0;
|
||||
@ -1562,12 +1562,11 @@ url_file_name (const struct url *u, char *replaced_filename)
|
||||
u_file = *u->file ? u->file : index_filename;
|
||||
append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
|
||||
|
||||
/* Append "?query" to the file name. */
|
||||
u_query = u->query && *u->query ? u->query : NULL;
|
||||
if (u_query)
|
||||
/* Append "?query" to the file name, even if empty */
|
||||
if (u->query)
|
||||
{
|
||||
append_char (FN_QUERY_SEP, &fnres);
|
||||
append_uri_pathel (u_query, u_query + strlen (u_query),
|
||||
append_uri_pathel (u->query, u->query + strlen (u->query),
|
||||
true, &fnres);
|
||||
}
|
||||
}
|
||||
|
121
src/utils.c
121
src/utils.c
@ -59,12 +59,12 @@ as that of the covered work. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
/* For TIOCGWINSZ and friends: */
|
||||
#ifdef HAVE_SYS_IOCTL_H
|
||||
# include <sys/ioctl.h>
|
||||
#endif
|
||||
#include <sys/ioctl.h>
|
||||
#ifdef HAVE_TERMIOS_H
|
||||
# include <termios.h>
|
||||
#endif
|
||||
@ -73,6 +73,11 @@ as that of the covered work. */
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
#include <regex.h>
|
||||
#ifdef HAVE_LIBPCRE
|
||||
# include <pcre.h>
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SIGSETJMP
|
||||
/* If sigsetjmp is a macro, configure won't pick it up. */
|
||||
# ifdef sigsetjmp
|
||||
@ -769,8 +774,7 @@ fopen_excl (const char *fname, int binary)
|
||||
open_id = 13;
|
||||
fd = open( fname, /* File name. */
|
||||
flags, /* Flags. */
|
||||
0777, /* Mode for default protection.
|
||||
*/
|
||||
0777, /* Mode for default protection. */
|
||||
"rfm=stmlf", /* Stream_LF. */
|
||||
OPEN_OPT_ARGS); /* Access callback. */
|
||||
}
|
||||
@ -918,6 +922,19 @@ acceptable (const char *s)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Determine whether an URL is acceptable to be followed, according to
|
||||
regex patterns to accept/reject. */
|
||||
bool
|
||||
accept_url (const char *s)
|
||||
{
|
||||
if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
|
||||
return false;
|
||||
if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
|
||||
will return true if and only if D2 begins with `/something/' or is exactly
|
||||
'/something'. */
|
||||
@ -1826,6 +1843,17 @@ number_to_static_string (wgint number)
|
||||
ringpos = (ringpos + 1) % RING_SIZE;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* Converts the byte to bits format if --report-bps option is enabled
|
||||
*/
|
||||
wgint
|
||||
convert_to_bits (wgint num)
|
||||
{
|
||||
if (opt.report_bps)
|
||||
return num * 8;
|
||||
return num;
|
||||
}
|
||||
|
||||
|
||||
/* Determine the width of the terminal we're running on. If that's
|
||||
not possible, return 0. */
|
||||
@ -2299,6 +2327,89 @@ base64_decode (const char *base64, void *dest)
|
||||
return q - (char *) dest;
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBPCRE
|
||||
/* Compiles the PCRE regex. */
|
||||
void *
|
||||
compile_pcre_regex (const char *str)
|
||||
{
|
||||
const char *errbuf;
|
||||
int erroffset;
|
||||
pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
|
||||
if (! regex)
|
||||
{
|
||||
fprintf (stderr, _("Invalid regular expression %s, %s\n"),
|
||||
quote (str), errbuf);
|
||||
return false;
|
||||
}
|
||||
return regex;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Compiles the POSIX regex. */
|
||||
void *
|
||||
compile_posix_regex (const char *str)
|
||||
{
|
||||
regex_t *regex = xmalloc (sizeof (regex_t));
|
||||
int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
|
||||
if (errcode != 0)
|
||||
{
|
||||
int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
|
||||
char *errbuf = xmalloc (errbuf_size);
|
||||
regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
|
||||
fprintf (stderr, _("Invalid regular expression %s, %s\n"),
|
||||
quote (str), errbuf);
|
||||
xfree (errbuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return regex;
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBPCRE
|
||||
#define OVECCOUNT 30
|
||||
/* Matches a PCRE regex. */
|
||||
bool
|
||||
match_pcre_regex (const void *regex, const char *str)
|
||||
{
|
||||
int l = strlen (str);
|
||||
int ovector[OVECCOUNT];
|
||||
|
||||
int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
|
||||
if (rc == PCRE_ERROR_NOMATCH)
|
||||
return false;
|
||||
else if (rc < 0)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
|
||||
quote (str), rc);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return true;
|
||||
}
|
||||
#undef OVECCOUNT
|
||||
#endif
|
||||
|
||||
/* Matches a POSIX regex. */
|
||||
bool
|
||||
match_posix_regex (const void *regex, const char *str)
|
||||
{
|
||||
int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
|
||||
if (rc == REG_NOMATCH)
|
||||
return false;
|
||||
else if (rc == 0)
|
||||
return true;
|
||||
else
|
||||
{
|
||||
int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
|
||||
char *errbuf = xmalloc (errbuf_size);
|
||||
regerror (rc, opt.acceptregex, errbuf, errbuf_size);
|
||||
logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
|
||||
quote (str), rc);
|
||||
xfree (errbuf);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#undef IS_ASCII
|
||||
#undef NEXT_CHAR
|
||||
|
||||
|
10
src/utils.h
10
src/utils.h
@ -90,6 +90,7 @@ char *file_merge (const char *, const char *);
|
||||
|
||||
int fnmatch_nocase (const char *, const char *, int);
|
||||
bool acceptable (const char *);
|
||||
bool accept_url (const char *);
|
||||
bool accdir (const char *s);
|
||||
char *suffix (const char *s);
|
||||
bool match_tail (const char *, const char *, bool);
|
||||
@ -127,6 +128,7 @@ char *human_readable (HR_NUMTYPE);
|
||||
int numdigit (wgint);
|
||||
char *number_to_string (char *, wgint);
|
||||
char *number_to_static_string (wgint);
|
||||
wgint convert_to_bits (wgint);
|
||||
|
||||
int determine_screen_width (void);
|
||||
int random_number (int);
|
||||
@ -141,6 +143,14 @@ void xsleep (double);
|
||||
int base64_encode (const void *, int, char *);
|
||||
int base64_decode (const char *, void *);
|
||||
|
||||
#ifdef HAVE_LIBPCRE
|
||||
void *compile_pcre_regex (const char *);
|
||||
bool match_pcre_regex (const void *, const char *);
|
||||
#endif
|
||||
|
||||
void *compile_posix_regex (const char *);
|
||||
bool match_posix_regex (const void *, const char *);
|
||||
|
||||
void stable_sort (void *, size_t, size_t, int (*) (const void *, const void *));
|
||||
|
||||
const char *print_decimal (double);
|
||||
|
1440
src/warc.c
Normal file
1440
src/warc.c
Normal file
File diff suppressed because it is too large
Load Diff
23
src/warc.h
Normal file
23
src/warc.h
Normal file
@ -0,0 +1,23 @@
|
||||
/* Declarations of WARC helper methods. */
|
||||
#ifndef WARC_H
|
||||
#define WARC_H
|
||||
|
||||
#include "host.h"
|
||||
|
||||
void warc_init (void);
|
||||
void warc_close (void);
|
||||
void warc_timestamp (char *timestamp);
|
||||
void warc_uuid_str (char *id_str);
|
||||
|
||||
FILE * warc_tempfile (void);
|
||||
|
||||
bool warc_write_request_record (char *url, char *timestamp_str,
|
||||
char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset);
|
||||
bool warc_write_response_record (char *url, char *timestamp_str,
|
||||
char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset,
|
||||
char *mime_type, int response_code, char *redirect_location);
|
||||
bool warc_write_resource_record (char *resource_uuid, const char *url,
|
||||
const char *timestamp_str, const char *concurrent_to_uuid, ip_address *ip,
|
||||
const char *content_type, FILE *body, off_t payload_offset);
|
||||
|
||||
#endif /* WARC_H */
|
@ -353,7 +353,9 @@ typedef enum
|
||||
PROXERR,
|
||||
/* 50 */
|
||||
AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR,
|
||||
UNLINKERR, NEWLOCATION_KEEP_POST
|
||||
UNLINKERR, NEWLOCATION_KEEP_POST, CLOSEFAILED,
|
||||
|
||||
WARC_ERR, WARC_TMP_FOPENERR, WARC_TMP_FWRITEERR
|
||||
} uerr_t;
|
||||
|
||||
struct range {
|
||||
|
@ -1,3 +1,9 @@
|
||||
2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add Test-stdouterr.px.
|
||||
* run-px (tests): Likewise.
|
||||
* Test-stdouterr.px: New file.
|
||||
|
||||
2011-06-03 Merinov Nikolay <kim.roader@gmail.com>
|
||||
|
||||
* Test-idn-cmd-utf8.px: Added test for idn with utf-8 local encoding.
|
||||
|
@ -124,6 +124,7 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \
|
||||
Test-restrict-ascii.px \
|
||||
Test-Restrict-Lowercase.px \
|
||||
Test-Restrict-Uppercase.px \
|
||||
Test-stdouterr.px \
|
||||
Test--spider-fail.px \
|
||||
Test--spider.px \
|
||||
Test--spider-r-HTTP-Content-Disposition.px \
|
||||
|
48
tests/Test-stdouterr.px
Executable file
48
tests/Test-stdouterr.px
Executable file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTTPTest;
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
||||
# code, msg, headers, content
|
||||
my %urls = (
|
||||
'/somefile.txt' => {
|
||||
code => "200",
|
||||
msg => "Dontcare",
|
||||
headers => {
|
||||
"Content-type" => "text/plain",
|
||||
},
|
||||
content => "blabla",
|
||||
},
|
||||
);
|
||||
|
||||
unless(-e "/dev/full") {
|
||||
exit(2); # skip
|
||||
}
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " -c http://localhost:{{port}}/somefile.txt -O /dev/full";
|
||||
|
||||
my $expected_error_code = 3;
|
||||
|
||||
my %existing_files = (
|
||||
);
|
||||
|
||||
my %expected_downloaded_files = (
|
||||
);
|
||||
|
||||
###############################################################################
|
||||
|
||||
my $the_test = HTTPTest->new (name => "Test-stdouterr",
|
||||
input => \%urls,
|
||||
cmdline => $cmdline,
|
||||
errcode => $expected_error_code,
|
||||
existing => \%existing_files,
|
||||
output => \%expected_downloaded_files);
|
||||
exit $the_test->run();
|
||||
|
||||
# vim: et ts=4 sw=4
|
||||
|
@ -74,6 +74,7 @@ my @tests = (
|
||||
'Test-restrict-ascii.px',
|
||||
'Test-Restrict-Lowercase.px',
|
||||
'Test-Restrict-Uppercase.px',
|
||||
'Test-stdouterr.px',
|
||||
'Test--spider-fail.px',
|
||||
'Test--spider-r-HTTP-Content-Disposition.px',
|
||||
'Test--spider-r--no-content-disposition.px',
|
||||
|
@ -33,11 +33,11 @@ my $tex_content = read_file($tex_file);
|
||||
|
||||
my @args = ([
|
||||
$main_content,
|
||||
qr/static \s+? struct \s+? cmdline_option \s+? option_data\[\] \s+? = \s+? \{ (.*?) \}\;/sx,
|
||||
qr/static \s+? struct \s+? cmdline_option \s+? option_data\[\] \s+? = \s+? \{ (.+?) \}\;/sx,
|
||||
[ qw(long_name short_name type data argtype) ],
|
||||
], [
|
||||
$init_content,
|
||||
qr/commands\[\] \s+? = \s+? \{ (.*?) \}\;/sx,
|
||||
qr/commands\[\] \s+? = \s+? \{ (.+?) \}\;/sx,
|
||||
[ qw(name place action) ],
|
||||
]);
|
||||
|
||||
@ -78,18 +78,18 @@ sub extract_entries
|
||||
my (@entries, %index, $i);
|
||||
|
||||
foreach my $chunk (@$chunks) {
|
||||
my ($args) = $chunk =~ /\{ \s+? (.*?) \s+? \}/sx;
|
||||
my ($args) = $chunk =~ /\{ \s+? (.+?) \s+? \}/sx;
|
||||
next unless defined $args;
|
||||
|
||||
my @args = map {
|
||||
tr/'"//d; $_
|
||||
} map {
|
||||
/\((.*?)\)/ ? $1 : $_
|
||||
/\((.+?)\)/ ? $1 : $_
|
||||
} split /\,\s+/, $args;
|
||||
|
||||
my $entry = { map { $_ => shift @args } @$names };
|
||||
|
||||
($entry->{line}) = $chunk =~ /^ \s+? (\{.*)/mx;
|
||||
($entry->{line}) = $chunk =~ /^ \s+? (\{.+)/mx;
|
||||
if ($chunk =~ /deprecated/i) {
|
||||
$entries[-1]->{deprecated} = true;
|
||||
}
|
||||
@ -103,9 +103,9 @@ sub extract_entries
|
||||
push @entries, $entry;
|
||||
}
|
||||
|
||||
push @entries, \%index;
|
||||
push @entries, { %index };
|
||||
|
||||
return \@entries;
|
||||
return [ @entries ];
|
||||
}
|
||||
|
||||
sub output_results
|
||||
@ -281,7 +281,7 @@ sub emit_undocumented_opts
|
||||
while ($tex =~ /^\@item\w*? \s+? --([-a-z0-9]+)/gmx) {
|
||||
$tex_items{$1} = true;
|
||||
}
|
||||
my ($help) = $main =~ /\n print_help .*? \{\n (.*) \n\} \n/sx;
|
||||
my ($help) = $main =~ /\n print_help .*? \{\n (.+) \n\} \n/sx;
|
||||
while ($help =~ /--([-a-z0-9]+)/g) {
|
||||
$main_items{$1} = true;
|
||||
}
|
||||
|
@ -128,5 +128,11 @@ main (int argc, char *argv[])
|
||||
exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (close (fd) < 0)
|
||||
{
|
||||
perror (PROGRAM_NAME ": close");
|
||||
exit (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user