Resolve conflicts to complete merging with master branch.

This commit is contained in:
Ilim Ugur 2012-08-14 19:47:52 +03:00
commit eb01e9d442
58 changed files with 3581 additions and 533 deletions

View File

@ -1,3 +1,68 @@
2012-07-08 Giuseppe Scrivano <gscrivano@gnu.org>
* bootstrap: Update from gnulib.
* bootstrap.conf (gnulib_extra_files): Remove $build_aux/missing.
* lib/Makefile.am: Delete file.
2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
* bootstrap.conf (gnulib_modules): Remove `closeout'.
Reported by: Micah Cowan <micah@cowan.name>.
2012-05-31 Ángel González <keisial@gmail.com>
* convert.c: fix segfault on wrong urls (bug 36570)
2012-05-13 Giuseppe Scrivano <gscrivano@gnu.org>
* bootstrap.conf (gnulib_modules): Add `git-version-gen'.
* build-aux/bzr-version-gen: Remove file.
* configure.ac: Invoke `build-aux/git-version-gen' to get the dist
version.
* Makefile.am (EXTRA_DIST): Distribute build-aux/git-version-gen instead
of build-aux/bzr-version-gen.
2012-04-11 Gijs van Tulder <gvtulder@gmail.com>
* bootstrap.conf (gnulib_modules): Include module `regex'.
* configure.ac: Check for PCRE library.
2012-03-25 Ray Satiro <raysatiro@yahoo.com>
* configure.ac: Fix build under mingw when OpenSSL is used.
2012-03-20 Ángel González <keisial@gmail.com>
* bootstrap.conf (gnulib_modules): Add modules `ftello',
`mkstemp' and `strtok_r'.
2012-02-26 Giuseppe Scrivano <gscrivano@gnu.org>
* bootstrap.conf (gnulib_modules): Add module `closeout'.
2012-01-09 Gijs van Tulder <gvtulder@gmail.com>
* configure.ac: Always try to use libz, even without SSL.
2011-12-12 Giuseppe Scrivano <gscrivano@gnu.org>
* Makefile.am (EXTRA_DIST): Add build-aux/bzr-version-gen.
Reported by: Elan Ruusamäe <glen@pld-linux.org>.
2011-12-11 Giuseppe Scrivano <gscrivano@gnu.org>
* util/trunc.c (main): Call `close' on the fd and check for errors.
Reported by: <dga@cs.cmu.edu>.
2011-10-23 Giuseppe Scrivano <gscrivano@gnu.org>
* bootstrap.conf (gnulib_modules): Include module `vsnprintf'.
2011-10-16 Steven Schubiger <stsc@member.fsf.org>
* util/paramcheck.pl: Match 1 or more times where applicable.
(extract_entries): Return a copy instead of reference.
2011-09-04 Alan Hourihane <alanh@fairlite.co.uk> (tiny change)
* configure.ac: Check for libz when gnutls is used.

View File

@ -46,7 +46,7 @@ SUBDIRS = lib src doc po tests util
EXTRA_DIST = ChangeLog.README MAILING-LIST \
msdos/ChangeLog msdos/config.h msdos/Makefile.DJ \
msdos/Makefile.WC ABOUT-NLS \
build-aux/build_info.pl .version
build-aux/build_info.pl build-aux/git-version-gen .version
CLEANFILES = *~ *.bak $(DISTNAME).tar.gz

26
NEWS
View File

@ -1,16 +1,37 @@
GNU Wget NEWS -- history of user-visible changes.
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
See the end for copying conditions.
Please send GNU Wget bug reports to <bug-wget@gnu.org>.
* Changes in Wget X.Y.Z
* Changes in Wget 1.14
** Add support for content-on-error. It allows to store the HTTP
payload on 4xx or 5xx errors.
** Add support for WARC files.
** Fix a memory leak problem in the GNU TLS backend.
** Autoreconf works again for distributed tarballs.
** Print some diagnostic messages to stderr not to stdout.
** Report stdout close errors.
** Accept the --report-speed option.
** Enable client certificates when GNU TLS is used.
** Add support for TLS Server Name Indication.
** Accept the arguments --accept-reject and --reject-regex.
** The GNU TLS backend honors correctly the timeout value.
** Add support for RFC 2617 Digest Access Authentication.
* Changes in Wget 1.13.4
@ -23,6 +44,7 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
** Return a network failure when FTP downloads fail and --timestamping
is specified.
** Fix a segfault on an incomplete STYLE tag.
* Changes in Wget 1.13.3

View File

@ -1,6 +1,6 @@
#! /bin/sh
# Print a version string.
scriptversion=2012-07-19.14; # UTC
scriptversion=2012-07-06.11; # UTC
# Bootstrap this package from checked-out sources.
@ -77,32 +77,15 @@ Running without arguments will suffice in most cases.
EOF
}
# warnf_ FORMAT-STRING ARG1...
warnf_ ()
warn()
{
warnf_format_=$1
shift
nl='
'
case $* in
*$nl*) me_=$(printf "$me"|tr "$nl|" '??')
printf "$warnf_format_" "$@" | sed "s|^|$me_: |" ;;
*) printf "$me: $warnf_format_" "$@" ;;
esac >&2
for i
do
echo "$i"
done | sed -e "s/^/$me: /" >&2
}
# warn_ WORD1...
warn_ ()
{
# If IFS does not start with ' ', set it and emit the warning in a subshell.
case $IFS in
' '*) warnf_ '%s\n' "$*";;
*) (IFS=' '; warn_ "$@");;
esac
}
# die WORD1...
die() { warn_ "$@"; exit 1; }
die() { warn "$@"; exit 1; }
# Configuration.
@ -354,7 +337,8 @@ grep '^[ ]*AC_CONFIG_AUX_DIR(\['"$build_aux"'\])' configure.ac \
grep '^[ ]*AC_CONFIG_AUX_DIR('"$build_aux"')' configure.ac \
>/dev/null && found_aux_dir=yes
test $found_aux_dir = yes \
|| die "configure.ac lacks 'AC_CONFIG_AUX_DIR([$build_aux])'; add it"
|| die "expected line not found in configure.ac. Add the following:" \
" AC_CONFIG_AUX_DIR([$build_aux])"
# If $build_aux doesn't exist, create it now, otherwise some bits
# below will malfunction. If creating it, also mark it as ignored.
@ -460,7 +444,7 @@ check_versions() {
automake-ng|aclocal-ng)
app=${app%-ng}
($app --version | grep '(GNU automake-ng)') >/dev/null 2>&1 || {
warn_ "Error: '$app' not found or not from Automake-NG"
warn "Error: '$app' not found or not from Automake-NG"
ret=1
continue
} ;;
@ -470,14 +454,7 @@ check_versions() {
# so we have to rely on $? rather than get_version.
$app --version >/dev/null 2>&1
if [ 126 -le $? ]; then
warn_ "Error: '$app' not found"
ret=1
fi
else
# Require app to produce a new enough version string.
inst_ver=$(get_version $app)
if [ ! "$inst_ver" ]; then
warn_ "Error: '$app' not found"
warn "Error: '$app' not found"
ret=1
else
latest_ver=$(sort_ver $req_ver $inst_ver | cut -d' ' -f2)
@ -488,6 +465,20 @@ check_versions() {
ret=1
fi
fi
else
# Require app to produce a new enough version string.
inst_ver=$(get_version $app)
if [ ! "$inst_ver" ]; then
warn "Error: '$app' not found"
ret=1
else
latest_ver=$(sort_ver $req_ver $inst_ver | cut -d' ' -f2)
if [ ! "$latest_ver" = "$inst_ver" ]; then
warn "Error: '$app' version == $inst_ver is too old" \
" '$app' version >= $req_ver is required"
ret=1
fi
fi
fi
done
@ -780,7 +771,7 @@ version_controlled_file() {
grep -F "/${file##*/}/" "$parent/CVS/Entries" 2>/dev/null |
grep '^/[^/]*/[0-9]' > /dev/null
else
warn_ "no version control for $file?"
warn "no version control for $file?"
false
fi
}

View File

@ -1,5 +1,5 @@
# bootstrap.conf - Bootstrap configuration.
# Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
# Inc.
#
# This file is part of GNU Wget.
@ -28,6 +28,7 @@ gnulib_modules="
accept
alloca
announce-gen
base32
bind
c-ctype
clock-time
@ -35,11 +36,13 @@ close
connect
fcntl
futimens
ftello
getaddrinfo
getopt-gnu
getpass-gnu
getpeername
getsockname
git-version-gen
gnupload
ioctl
iconv
@ -48,6 +51,7 @@ listen
maintainer-makefile
mbtowc
mkdir
mkstemp
crypto/md5
crypto/sha1
crypto/sha256
@ -55,6 +59,7 @@ pipe
quote
quotearg
recv
regex
select
send
setsockopt
@ -65,9 +70,12 @@ socket
stdbool
strcasestr
strerror_r-posix
strtok_r
tmpdir
unlocked-io
update-copyright
vasprintf
vsnprintf
write
"

View File

@ -1,57 +0,0 @@
#!/bin/sh
scriptversion=2011-08-11.08; # UTC
# Copyright (C) 2010, 2011 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Additional permission under GNU GPL version 3 section 7
# Written by Giuseppe Scrivano.
if test -f .tarball-version
then
cat .tarball-version | tr -d '\n'
exit 0
fi
DIRTY=""
test -n "`bzr diff | tr -d '\n'`" && DIRTY="-dirty"
REVNO=`bzr revno`
TAG=`bzr tags -r $REVNO | cut -d' ' -f1`
if test -z "$TAG"
then
TAG=`bzr tags --sort=time -r ..$REVNO | tail -n1 | cut -d' ' -f1`
# No tags yet
test -z "$TAG" && TAG="unknown"
TAG=$TAG-$REVNO
fi
printf "%s%s" "$TAG" "$DIRTY"
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

View File

@ -31,7 +31,7 @@ dnl Process this file with autoconf to produce a configure script.
dnl
AC_INIT([wget],
[m4_esyscmd([build-aux/bzr-version-gen])],
m4_esyscmd([build-aux/git-version-gen .tarball-version]),
[bug-wget@gnu.org])
AC_PREREQ(2.61)
@ -65,6 +65,9 @@ AC_ARG_WITH(ssl,
[[ --without-ssl disable SSL autodetection
--with-ssl={gnutls,openssl} specify the SSL backend. GNU TLS is the default.]])
AC_ARG_WITH(zlib,
[[ --without-zlib disable zlib ]])
AC_ARG_ENABLE(opie,
[ --disable-opie disable support for opie or s/key FTP login],
ENABLE_OPIE=$enableval, ENABLE_OPIE=yes)
@ -246,6 +249,10 @@ dnl
dnl Checks for libraries.
dnl
AS_IF([test x"$with_zlib" != xno], [
AC_CHECK_LIB(z, compress)
])
AS_IF([test x"$with_ssl" = xopenssl], [
dnl some versions of openssl use zlib compression
AC_CHECK_LIB(z, compress)
@ -269,6 +276,9 @@ AS_IF([test x"$with_ssl" = xopenssl], [
AC_CHECK_LIB(ssl32, SSL_connect, [
ssl_found=yes
AC_MSG_NOTICE([Enabling support for SSL via OpenSSL (shared)])
AC_LIBOBJ([openssl])
LIBS="${LIBS} -lssl32"
AC_DEFINE([HAVE_LIBSSL32], [1], [Define to 1 if you have the `ssl32' library (-lssl32).])
],
AC_MSG_ERROR([openssl not found: shared lib eay32 found but ssl32 not found]))
@ -294,6 +304,7 @@ AS_IF([test x$ssl_found != xyes],
], [SSL_library_init ()])
if test x"$LIBSSL" != x
then
ssl_found=yes
AC_MSG_NOTICE([compiling in support for SSL via OpenSSL])
AC_LIBOBJ([openssl])
LIBS="$LIBSSL $LIBS"
@ -301,9 +312,6 @@ AS_IF([test x$ssl_found != xyes],
then
AC_MSG_ERROR([--with-ssl=openssl was given, but SSL is not available.])
fi
AC_LIBOBJ([openssl])
])
], [
@ -321,6 +329,7 @@ AS_IF([test x$ssl_found != xyes],
], [gnutls_global_init()])
if test x"$LIBGNUTLS" != x
then
ssl_found=yes
AC_MSG_NOTICE([compiling in support for SSL via GnuTLS])
AC_LIBOBJ([gnutls])
LIBS="$LIBGNUTLS $LIBS"
@ -333,7 +342,7 @@ AS_IF([test x$ssl_found != xyes],
]) # endif: --with-ssl == openssl?
dnl Enable NTLM if requested and if SSL is available.
if test x"$LIBSSL" != x
if test x"$LIBSSL" != x || test "$ac_cv_lib_ssl32_SSL_connect" = yes
then
if test x"$ENABLE_NTLM" != xno
then
@ -586,7 +595,31 @@ if test "X$iri" != "Xno"; then
fi
fi
dnl
dnl Check for UUID
dnl
AC_CHECK_HEADER(uuid/uuid.h,
AC_CHECK_LIB(uuid, uuid_generate,
[LIBS="${LIBS} -luuid"
AC_DEFINE([HAVE_LIBUUID], 1,
[Define if libuuid is available.])
])
)
dnl
dnl Check for PCRE
dnl
AC_CHECK_HEADER(pcre.h,
AC_CHECK_LIB(pcre, pcre_compile,
[LIBS="${LIBS} -lpcre"
AC_DEFINE([HAVE_LIBPCRE], 1,
[Define if libpcre is available.])
])
)
dnl Needed by src/Makefile.am
AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"])
AM_CONDITIONAL([METALINK_IS_ENABLED], [test "X$metalink" != "Xno"])

View File

@ -1,3 +1,19 @@
2012-08-04 mancha <mancha@mac.hush.com> (tiny change)
* wget.texi: Export ENVIRONMENT to the man page.
2012-06-09 Giuseppe Scrivano <gscrivano@gnu.org>
* wget.texi (Logging and Input File Options): Document "--report-speed".
(HTTPS (SSL/TLS) Options): Document WARC.
* texi2pod.pl: Revert change from 2011-08-06.
2012-05-13 Giuseppe Scrivano <gscrivano@gnu.org>
* wget.texi (Types of Files): Document --accept-regex and
--reject-regex.
2011-10-02 Henrik Holst <henrik.holst@millistream.com> (tiny change)
* wget.texi (HTTP Options): Document option --content-on-error.

View File

@ -1,6 +1,7 @@
#! /usr/bin/env perl
# Copyright (C) 1999, 2000, 2001, 2003, 2010 Free Software Foundation, Inc.
# Copyright (C) 1999, 2000, 2001, 2003, 2007, 2009, 2010, 2011 Free
# Software Foundation, Inc.
# This file is part of GCC.
@ -15,14 +16,15 @@
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING. If not, write to
# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
# Boston MA 02110-1301, USA.
# along with GCC. If not, see <http://www.gnu.org/licenses/>.
# This does trivial (and I mean _trivial_) conversion of Texinfo
# markup to Perl POD format. It's intended to be used to extract
# something suitable for a manpage from a Texinfo document.
use warnings;
BEGIN { eval { require warnings; } and warnings->import; }
$output = 0;
$skipping = 0;
%sects = ();
@ -36,7 +38,6 @@ $shift = "";
$fnno = 1;
$inf = "";
$ibase = "";
@ipath = ();
while ($_ = shift) {
if (/^-D(.*)$/) {
@ -52,13 +53,6 @@ while ($_ = shift) {
die "flags may only contain letters, digits, hyphens, dashes and underscores\n"
unless $flag =~ /^[a-zA-Z0-9_-]+$/;
$defs{$flag} = $value;
} elsif (/^-I(.*)$/) {
if ($1 ne "") {
$flag = $1;
} else {
$flag = shift;
}
push (@ipath, $flag);
} elsif (/^-/) {
usage();
} else {
@ -162,8 +156,6 @@ while(<$inf>) {
} elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) {
$_ = "\n=back\n";
$ic = pop @icstack;
} elsif ($ended eq "multitable") {
$_ = "\n=back\n";
} else {
die "unknown command \@end $ended at line $.\n";
}
@ -213,18 +205,14 @@ while(<$inf>) {
# Now the ones that have to be replaced by special escapes
# (which will be turned back into text by unmunge())
# Replace @@ before @{ and @} in order to parse @samp{@@} correctly.
s/&/&amp;/g;
s/\@\@/&at;/g;
s/\@\{/&lbrace;/g;
s/\@\}/&rbrace;/g;
s/\@`\{(.)\}/&$1grave;/g;
# Inside a verbatim block, handle @var, @samp and @url specially.
# Inside a verbatim block, handle @var specially.
if ($shift ne "") {
s/\@var\{([^\}]*)\}/<$1>/g;
s/\@samp\{([^\}]*)\}/"$1"/g;
s/\@url\{([^\}]*)\}/<$1>/g;
}
# POD doesn't interpret E<> inside a verbatim block.
@ -243,23 +231,17 @@ while(<$inf>) {
$inf = gensym();
$file = postprocess($1);
# Try cwd and $ibase, then explicit -I paths.
$done = 0;
foreach $path ("", $ibase, @ipath) {
$mypath = $file;
$mypath = $path . "/" . $mypath if ($path ne "");
open($inf, "<" . $mypath) and ($done = 1, last);
}
die "cannot find $file" if !$done;
# Try cwd and $ibase.
open($inf, "<" . $file)
or open($inf, "<" . $ibase . "/" . $file)
or die "cannot open $file or $ibase/$file: $!\n";
next;
};
/^\@(?:section|unnumbered|unnumberedsec|center|heading)\s+(.+)$/
/^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/
and $_ = "\n=head2 $1\n";
/^\@subsection\s+(.+)$/
and $_ = "\n=head3 $1\n";
/^\@subsubsection\s+(.+)$/
and $_ = "\n=head4 $1\n";
# Block command handlers:
/^\@itemize(?:\s+(\@[a-z]+|\*|-))?/ and do {
@ -268,7 +250,7 @@ while(<$inf>) {
if (defined $1) {
$ic = $1;
} else {
$ic = '*';
$ic = '@bullet';
}
$_ = "\n=over 4\n";
$endw = "itemize";
@ -286,12 +268,6 @@ while(<$inf>) {
$endw = "enumerate";
};
/^\@multitable\s.*/ and do {
push @endwstack, $endw;
$endw = "multitable";
$_ = "\n=over 4\n";
};
/^\@([fv]?table)\s+(\@[a-z]+)/ and do {
push @endwstack, $endw;
push @icstack, $ic;
@ -301,7 +277,6 @@ while(<$inf>) {
$ic =~ s/\@(?:code|kbd)/C/;
$ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
$ic =~ s/\@(?:file)/F/;
$ic =~ s/\@(?:asis)//;
$_ = "\n=over 4\n";
};
@ -312,29 +287,14 @@ while(<$inf>) {
$_ = ""; # need a paragraph break
};
/^\@item\s+(.*\S)\s*$/ and $endw eq "multitable" and do {
@columns = ();
for $column (split (/\s*\@tab\s*/, $1)) {
# @strong{...} is used a @headitem work-alike
$column =~ s/^\@strong{(.*)}$/$1/;
push @columns, $column;
}
$_ = "\n=item ".join (" : ", @columns)."\n";
};
/^\@itemx?\s*(.+)?$/ and do {
if (defined $1) {
if ($ic) {
if ($endw eq "enumerate") {
$_ = "\n=item $ic $1\n";
$ic =~ s/(\d+)/$1 + 1/eg;
} else {
# Entity escapes prevent munging by the <>
# processing below.
$_ = "\n=item $ic\&LT;$1\&GT;\n";
}
my $thing = $1;
if ($ic =~ /\@asis/) {
$_ = "\n=item $thing\n";
} else {
$_ = "\n=item $1\n";
# Entity escapes prevent munging by the <> processing below.
$_ = "\n=item $ic\&LT;$thing\&GT;\n";
}
} else {
$_ = "\n=item $ic\n";
@ -355,11 +315,12 @@ die "No filename or title\n" unless defined $fn && defined $tl;
$sects{NAME} = "$fn \- $tl\n";
$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS ENVIRONMENT FILES
BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS ENVIRONMENT EXITSTATUS
FILES BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
if(exists $sects{$sect}) {
$head = $sect;
$head =~ s/SEEALSO/SEE ALSO/;
$head =~ s/EXITSTATUS/EXIT STATUS/;
print "=head1 $head\n\n";
print scalar unmunge ($sects{$sect});
print "\n";
@ -391,13 +352,11 @@ sub postprocess
s/\@r\{([^\}]*)\}/R<$1>/g;
s/\@(?:dfn|var|emph|cite|i)\{([^\}]*)\}/I<$1>/g;
s/\@(?:code|kbd)\{([^\}]*)\}/C<$1>/g;
s/\@(?:samp|strong|key|option|env|command|b)\{([^\}]*)\}/B<$1>/g;
s/\@(?:gccoptlist|samp|strong|key|option|env|command|b)\{([^\}]*)\}/B<$1>/g;
s/\@sc\{([^\}]*)\}/\U$1/g;
s/\@acronym\{([^\}]*)\}/\U$1/g;
s/\@file\{([^\}]*)\}/F<$1>/g;
s/\@w\{([^\}]*)\}/S<$1>/g;
s/\@(?:dmn|math)\{([^\}]*)\}/$1/g;
s/\@\///g;
# keep references of the form @ref{...}, print them bold
s/\@(?:ref)\{([^\}]*)\}/B<$1>/g;
@ -419,9 +378,6 @@ sub postprocess
s/\@gol//g;
s/\@\*\s*\n?//g;
# Anchors are thrown away
s/\@anchor\{(?:[^\}]*)\}//g;
# @uref can take one, two, or three arguments, with different
# semantics each time. @url and @email are just like @uref with
# one argument, for our purposes.
@ -429,10 +385,6 @@ sub postprocess
s/\@uref\{([^\},]*),([^\},]*)\}/$2 (C<$1>)/g;
s/\@uref\{([^\},]*),([^\},]*),([^\},]*)\}/$3/g;
# Handle gccoptlist here, so it can contain the above formatting
# commands.
s/\@gccoptlist\{([^\}]*)\}/B<$1>/g;
# Un-escape <> at this point.
s/&LT;/</g;
s/&GT;/>/g;
@ -466,7 +418,6 @@ sub unmunge
# Replace escaped symbols with their equivalents.
local $_ = $_[0];
s/&(.)grave;/E<$1grave>/g;
s/&lt;/E<lt>/g;
s/&gt;/E<gt>/g;
s/&lbrace;/\{/g;

View File

@ -479,6 +479,10 @@ Turn off verbose without being completely quiet (use @samp{-q} for
that), which means that error messages and basic information still get
printed.
@item -nv
@itemx --report-speed=@var{type}
Output bandwidth as @var{type}. The only accepted value is @samp{bits}.
@cindex input-file
@item -i @var{file}
@itemx --input-file=@var{file}
@ -1658,6 +1662,36 @@ not used), EGD is never contacted. EGD is not needed on modern Unix
systems that support @file{/dev/random}.
@end table
@cindex WARC
@table @samp
@item --warc-file=@var{file}
Use @var{file} as the destination WARC file.
@item --warc-header=@var{string}
Use @var{string} into as the warcinfo record.
@item --warc-max-size=@var{size}
Set the maximum size of the WARC files to @var{size}.
@item --warc-cdx
Write CDX index files.
@item --warc-dedup=@var{file}
Do not store records listed in this CDX file.
@item --no-warc-compression
Do not compress WARC files with GZIP.
@item --no-warc-digests
Do not calculate SHA1 digests.
@item --no-warc-keep-log
Do not store the log file in a WARC record.
@item --warc-tempdir=@var{dir}
Specify the location for temporary files created by the WARC writer.
@end table
@node FTP Options, Recursive Retrieval Options, HTTPS (SSL/TLS) Options, Invoking
@section FTP Options
@ -2284,6 +2318,8 @@ in @file{.wgetrc}.
@item -A @var{acclist}
@itemx --accept @var{acclist}
@itemx accept = @var{acclist}
@itemx --accept-regex @var{urlregex}
@itemx accept-regex = @var{urlregex}
The argument to @samp{--accept} option is a list of file suffixes or
patterns that Wget will download during recursive retrieval. A suffix
is the ending part of a file, and consists of ``normal'' letters,
@ -2300,6 +2336,9 @@ a description of how pattern matching works.
Of course, any number of suffixes and patterns can be combined into a
comma-separated list, and given as an argument to @samp{-A}.
The argument to @samp{--accept-regex} option is a regular expression which
is matched against the complete URL.
@cindex reject wildcards
@cindex reject suffixes
@cindex wildcards, reject
@ -2307,6 +2346,8 @@ comma-separated list, and given as an argument to @samp{-A}.
@item -R @var{rejlist}
@itemx --reject @var{rejlist}
@itemx reject = @var{rejlist}
@itemx --reject-regex @var{urlregex}
@itemx reject-regex = @var{urlregex}
The @samp{--reject} option works the same way as @samp{--accept}, only
its logic is the reverse; Wget will download all files @emph{except} the
ones matching the suffixes (or patterns) in the list.
@ -2318,6 +2359,9 @@ Analogously, to download all files except the ones beginning with
expansion by the shell.
@end table
The argument to @samp{--accept-regex} option is a regular expression which
is matched against the complete URL.
@noindent
The @samp{-A} and @samp{-R} options may be combined to achieve even
better fine-tuning of which files to retrieve. E.g. @samp{wget -A
@ -3532,28 +3576,30 @@ internal networks from the rest of Internet. In order to obtain
information from the Web, their users connect and retrieve remote data
using an authorized proxy.
@c man begin ENVIRONMENT
Wget supports proxies for both @sc{http} and @sc{ftp} retrievals. The
standard way to specify proxy location, which Wget recognizes, is using
the following environment variables:
@table @code
@table @env
@item http_proxy
@itemx https_proxy
If set, the @code{http_proxy} and @code{https_proxy} variables should
If set, the @env{http_proxy} and @env{https_proxy} variables should
contain the @sc{url}s of the proxies for @sc{http} and @sc{https}
connections respectively.
@item ftp_proxy
This variable should contain the @sc{url} of the proxy for @sc{ftp}
connections. It is quite common that @code{http_proxy} and
@code{ftp_proxy} are set to the same @sc{url}.
connections. It is quite common that @env{http_proxy} and
@env{ftp_proxy} are set to the same @sc{url}.
@item no_proxy
This variable should contain a comma-separated list of domain extensions
proxy should @emph{not} be used for. For instance, if the value of
@code{no_proxy} is @samp{.mit.edu}, proxy will not be used to retrieve
@env{no_proxy} is @samp{.mit.edu}, proxy will not be used to retrieve
documents from MIT.
@end table
@c man end
In addition to the environment variables, proxy location and settings
may be specified from within Wget itself.

View File

@ -1,3 +1,350 @@
2012-07-03 Steven Schubiger <stsc@member.fsf.org>
* init.c: Include warc.h for warc_close in cleanup function.
2012-07-08 Steven Schubiger <stsc@member.fsf.org>
* exits.h: Fix comment.
* exits.c: Likewise.
2012-07-07 Tim Ruehsen <tim.ruehsen@gmx.de>
(digest_authentication_encode): Add support for RFC 2617 Digest
Access Authentication.
2012-07-07 Giuseppe Scrivano <gscrivano@gnu.org>
* http.c (http_loop): Fix log message.
* main.c (main): Likewise.
Reported by: Petr Pisar <petr.pisar@atlas.cz>
2012-06-17 Giuseppe Scrivano <gscrivano@gnu.org>
* wget.h: Define `CLOSEFAILED'.
* init.c: Include "exits.h".
(cleanup): Check `fclose' failure.
* exits.c (get_status_for_err): Handle `CLOSEFAILED'.
2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
* main.c (main): Move some cleanup related function to...
* init.c (cleanup): ...here.
* main.c: Do not include "stdout.h".
(main): Do not register `close_stdout' at exit.
Reported by: Micah Cowan <micah@cowan.name>.
2012-06-09 Giuseppe Scrivano <gscrivano@gnu.org>
* main.c (print_help): Move --report-speed under the section
"Logging and input file".
2012-06-06 Giuseppe Scrivano <gscrivano@gnu.org>
* main.c (print_help): Rename --bits to --report-bps.
(cmdline_options): Likewise.
* init.c (commands): Rename --report-bps to --report-speed.
(cmd_spec_report_speed): New function.
* options.h (struct options): Rename `bits_fmt' to `report_bps'.
* main.c (print_help): Rename --bits to --report-bps.
(cmdline_options): Likewise.
* init.c (commands): Likewise
* progress.c (create_image): Adjust caller.
* retr.c (retr_rate): Likewise.
* utils.c (convert_to_bits): Likewise.
2012-06-04 Tim Ruehsen <tim.ruehsen@gmx.de>
* main.c (main): Check for filename != NULL.
* warc.c (warc_process_cdx_line): Fix memory leak.
* utils.c (match_posix_regex, compile_posix_regex): Remove dead
assignment.
* openssl.c (ssl_init): Fix old-style function definition.
2012-06-02 Giuseppe Scrivano <gscrivano@gnu.org>
* connect.c: Include <sys/socket.h> and <sys/select.h>.
2012-05-30 Gijs van Tulder <gvtulder@gmail.com>
* warc.c: Fix segfault if CDX record is not found.
2011-05-26 Steven Schweda <sms@antinode.info>
* connect.c [HAVE_SYS_SOCKET_H]: Include <sys/socket.h>.
[HAVE_SYS_SELECT_H]: Include <sys/select.h>.
2012-05-26 Mike Frysinger <vapier@gentoo.org>
* warc.c: Change type of `warc_current_gzfile' to gzFile.
2012-05-26 Giuseppe Scrivano <gscrivano@gnu.org>
* warc.c (warc_load_cdx_dedup_file): Change type of `line_length' to
ssize_t.
Suggested by: Ángel González <keisial@gmail.com>
2012-05-18 Tim Ruehsen <tim.ruehsen@gmx.de>
* gnutls.c (wgnutls_poll): Honor the specified `timeout' value.
(wgnutls_peek): Likewise.
2012-05-19 illusionoflife <illusion.of.life92@gmail.com> (tiny change)
* convert.c (register_html,register_css): Fixed functions signature to
not accept unused argument
* retr.c (retrieve_url): Changed register_{css,html} usage according
new signature.
2012-05-16 Giuseppe Scrivano <gscrivano@gnu.org>
* warc.h: Cut length lines to 80 columns.
* warc.c: Likewise.
2012-05-14 Tim Ruehsen <tim.ruehsen@gmx.de>
* gnutls.c (wgnutls_read_timeout): removed warnings, moved fcntl stuff
outside loop.
* hash.h (hash_table_put): Make argument "value" const.
* hash.c (hash_table_put): Make argument value const. Cast `value' to
void.
* http.c (request_set_header): Make argument `name' const. Cast `value'
and `name' to void*.
(request_remove_header): Make argument `name' const.
* url.c (url_file_name): Make `index_filename' static.
* warc.h (warc_write_cdx_record): Make `url', `timestamp', `mime_type',
`payload_digest', `redirect_location', `warc_filename', response_uuid'
arguments const. Make `checksum' const.
* warc.c (warc_write_date_header): Make the `timestamp' argument const.
Make `extension' const.
(warc_write_cdx_record): Make `url', `timestamp', `mime_type',
`payload_digest', `redirect_location', `warc_filename', response_uuid'
arguments const. Make `checksum' const.
2012-05-13 Tim Ruehsen <tim.ruehsen@gmx.de>
* gnutls.c (credentials): Change type to
gnutls_certificate_credentials_t.
(ssl_init): Do not use deprecated types.
(ssl_connect_wget): Likewise.
2012-04-11 Gijs van Tulder <gvtulder@gmail.com>
* init.c: Add --accept-regex, --reject-regex and --regex-type.
* main.c: Likewise.
* options.c: Likewise.
* recur.c: Likewise.
* utils.c: Add regex-related functions.
* utils.h: Add regex-related functions.
2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de>
* convert.c (convert_links_in_hashtable): Mmake it static.
* cookies.c (parse_set_cookie): Remove empty else branches.
* css-url.c: Include "css-url.h".
(get_uri_string): Make it static.
* css-url.h (get_urls_css): Add protoype.
* gnutls.c (ssl_init): Add prototype.
* html-parse.c (tagstack_push): Make it static.
* html-parse.c (tagstack_pop): Make it static.
* html-parse.c (tagstack_find): Make it static.
* html-url.c (cleanup_html_url): Make it static.
* progress.c (count_cols): Make it static.
* progress.c (get_eta): Make it static.
* retr.h (convert_to_bits): Remove prototype.
* util.h (convert_to_bits): Add prototype.
* spider.c (spider_cleanup): Make it static.
* warc.c (warc_write_start_record): Add prototype.
* warc.c (warc_write_end_record): Add prototype.
* warc.c (warc_start_cdx_file): Add prototype.
* warc.c (warc_init): Add prototype.
* warc.c (warc_load_cdx_dedup_file): Add prototype.
* warc.c (warc_write_metadata): Add prototype.
* warc.c (warc_close): Add prototype.
* warc.c (warc_tempfile): Add prototype.
* warc.c (warc_write_warcinfo_record): Make it static.
* warc.c (warc_load_cdx_dedup_file): Make it static.
* warc.c (warc_write_metadata): Make it static.
* warc.h (warc_init): Fix prototype.
* warc.h (warc_close): Fix prototype.
* warc.h (warc_tempfile): Fix prototype.
2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de>
* url.c: Use empty query in local filenames.
2012-04-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* main.c (main): Dynamically allocate `opt.progress_type'.
2012-04-21 Tim Ruehsen <tim.ruehsen@gmx.de>
* ftp-basic.c (ftp_pasv): Fix memory leak.
* http.c (gethttp): Fix memory leak.
* ftp.c (getftp): Silent compiler warning.
2009-06-14 Phil Pennock <mutt-dev@spodhuis.org> (tiny change)
* host.h: Declare `is_valid_ip_address'.
* host.c (is_valid_ip_address): New function.
* http.c (gethttp): Specify the hostname to ssl_connect_wget.
* gnutls.c (ssl_connect_wget): Specify the server name.
* openssl.c (ssl_connect_wget): Likewise.
* ssl.h: Change method signature for ssl_connect_wget.
2012-04-13 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
* warc.c (warc_load_cdx_dedup_file): Fix a memory leak by freeing
`lineptr'.
2012-04-07 Daniel Kahn Gillmor <dkg@fifthhorseman.net> (tiny change)
* gnutls.c (key_type_to_gnutls_type): New function.
(ssl_init): Use correctly the specified gnutls certificate.
2012-04-01 Gijs van Tulder <gvtulder@gmail.com>
* html-url.c: Prevent crash on incomplete STYLE tag.
2012-04-01 Giuseppe Scrivano <gscrivano@gnu.org>
* gnutls.c (wgnutls_read_timeout): Ensure timer is freed.
* gnutls.c (wgnutls_read_timeout): Do not use timer if it is not
allocated.
Reported by: Xu Zhongxing <xu_zhong_xing@163.com>
2012-03-30 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
* warc.c: make warc_uuid_str() implementation depend on HAVE_LIBUUID.
2012-03-29 Tim Ruehsen <tim.ruehsen@gmx.de> (tiny change)
* utils.c (library): Include <sys/time.h>.
2012-03-25 Giuseppe Scrivano <gscrivano@gnu.org>
* utils.c: Include <sys/ioctl.h>.
* ptimer.c: Include <sys/time.h>.
* connect.c: Include <sys/socket.h>, <sys/select.h>, <sys/time.h>.
Reported by: Ray Satiro <raysatiro@yahoo.com>.
2012-03-25 Ray Satiro <raysatiro@yahoo.com>
* build_info.c.in: Check that HAVE_LIBSSL32 is defined when OpenSSL
is used.
2012-03-07 Steven Schubiger <stsc@member.fsf.org>
* init.c (wgetrc_user_file_name): Correct typo.
2012-03-06 Sasikantha Babu <sasikanth.v19@gmail.com>
* utils.c (convert_to_bits): Added new function convert_to_bits to
convert bytes to bits.
* retr.c (calc_rate): Modified the function to handle --bits
option and download rate calculated as bits per sec (SI-prefix)
for --bits otherwise bytes (IEC-prefix).
(retr_rate): Rates will display in bits per sec for --bits.
* options.h (struct opt): Added --bit option bool variable bits_fmt.
* main.c (print_help) : Added help for --bit.
* init.c: Defined command for --bit option.
* retr.h: Added function prototype.
2012-02-26 Giuseppe Scrivano <gscrivano@gnu.org>
* main.c: Include "closeout.h"
(main): Register close_stdout at exit.
2012-02-01 Gijs van Tulder <gvtulder@gmail.com>
* warc.c: Fix large file support with ftello, fseeko.
* warc.h: Fix large file support.
* http.c: Fix large file support.
2012-02-23 Giuseppe Scrivano <giuseppe@southpole.se>
* main.c (main): Write diagnostic messages to `stderr' not to `stdout'.
* main.c (main): Fail gracefully if `malloc' fails.
* gnutls.c (wgnutls_read): Remove unused variables `timer' and `flags'.
2012-02-17 Steven Schubiger <stsc@member.fsf.org>
* warc.c: Add license header.
2012-01-27 Gijs van Tulder <gvtulder@gmail.com>
* retr.c (fd_read_body): If the response is chunked, the chunk
headers are now written to the WARC file, making the WARC file
an exact copy of the HTTP response.
2012-01-27 Gijs van Tulder <gvtulder@gmail.com>
* retr.c (fd_read_body): Fix a memory leak with chunked responses.
* http.c (skip_short_body): Fix the same memory leak.
2012-01-09 Gijs van Tulder <gvtulder@gmail.com>
* init.c: Disable WARC compression if zlib is disabled.
* main.c: Do not show the 'no-warc-compression' option if zlib is
disabled.
* warc.c: Do not compress WARC files if zlib is disabled.
2012-01-09 Sasikantha Babu <sasikanth.v19@gmail.com> (tiny change)
* connect.c (connect_to_ip): properly formatted ipv6 address display.
(socket_family): New function - returns socket family type.
* http.c (gethttp): properly formatted ipv6 address display.
2011-11-09 Gijs van Tulder <address@hidden>
* warc.c: Call gzdopen() with wb9 instead of wb+9, which fails on
zlib version >= 1.2.4.
2011-11-04 Steven Schweda <address@hidden>
* warc.c [! WINDOWS]: Include <libgen.h>.
(warc_write_warcinfo_record): Assign a new allocated buffer and
free it on errors.
2011-11-01 Steven Schweda <address@hidden>
* gnutls.c (ssl_init): Ensure GNU TLS is loaded only once.
2011-10-07 Steven Schweda <address@hidden>
* connect.c: Add HAVE_SYS_SELECT_H and HAVE_SYS_SOCKET_H conditions
on includes of <sys/select.h> and <sys/socket.h>, respectively.
* ftp.c (getftp): Move BIN_TYPE_TRANSFER macro into VMS-specific
section. On VMS, use Stream_LF attributes for listing files. Pass
BIN_TYPE_FILE to fopen_excl() instead of constant-everywhere "true".
* ftp.c (ftp_retrieve_list): Restore lost test of opt.preserve_perm
(--preserve-permissions) on the chmod() operation.
* init.c, main.c: Remove "deprecated" from opt.preserve_perm
(--preserve-permissions).
* init.c (initialize): Use distinct messages for errors in C macro
SYSTEM_WGETRC and environment-variable SYSTEM_WGETRC. Avoid use of
C macro SYSTEM_WGETRC when it's not defined.
* log.c (log_close): Avoid closing logfp when it's stderr.
* main.c (print_help): Restore --preserve-permissions.
* main.c (main): Avoid using a negative value of longindex as a
subscript (for long_options[]) when searching for "--config".
* main.c (main): Exit the program using exit() instead of "return".
(VMS handles these differently, and exit() is better.)
* openssl.c (ssl_init): Add type cast (SSL_METHOD *) to newly "const"
"meth" argument to accommodate OpenSSL version 0.9.8, where that
argument is not "const" in the OpenSSL function (SSL_CTX_new).
* test.c: Declare "program_argstring".
* utils.c (fopen_excl): Comment typography.
* warc.h: New file.
* warc.c: New file.
2011-10-02 Henrik Holst <henrik.holst@millistream.com> (tiny change)
* http.c (gethttp): If 'contentonerror' is used then do not
skip the http body on 4xx and 5xx errors.

View File

@ -36,9 +36,11 @@ IRI_OBJ = iri.c
endif
if METALINK_IS_ENABLED
METALINK_OBJ = metalink.c
METALINK_HEADER = metalink.h
endif
if THREADS_ARE_ENABLED
THREAD_OBJ = multi.c
MULTI_HEADER = multi.h
endif
# The following line is losing on some versions of make!
@ -52,17 +54,18 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \
css_.c css-url.c \
ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
http.c init.c log.c main.c netrc.c progress.c ptimer.c \
recur.c res.c retr.c spider.c url.c \
utils.c exits.c build_info.c $(IRI_OBJ) \
recur.c res.c retr.c spider.c url.c warc.c \
utils.c exits.c build_info.c $(IRI_OBJ) \
$(THREAD_OBJ) $(METALINK_OBJ) \
css-url.h css-tokens.h connect.h convert.h cookies.h \
ftp.h hash.h host.h html-parse.h html-url.h \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h \
exits.h gettext.h metalink.h multi.h
spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \
exits.h gettext.h $(THREAD_HEADER) $(METALINK_HEADER)
nodist_wget_SOURCES = version.c
EXTRA_wget_SOURCES = iri.c metalink.c
EXTRA_wget_SOURCES = iri.c metalink.c multi.c
LDADD = $(LIBOBJS) ../lib/libgnu.a
AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib

View File

@ -11,5 +11,5 @@ threads defined ENABLE_THREADS
metalink defined ENABLE_METALINK
ssl choice:
openssl defined HAVE_LIBSSL
openssl defined HAVE_LIBSSL || defined HAVE_LIBSSL32
gnutls defined HAVE_LIBGNUTLS

View File

@ -53,9 +53,7 @@ as that of the covered work. */
#include <errno.h>
#include <string.h>
#ifdef HAVE_SYS_TIME_H
# include <sys/time.h>
#endif
#include <sys/time.h>
#include "utils.h"
#include "host.h"
#include "connect.h"
@ -293,7 +291,12 @@ connect_to_ip (const ip_address *ip, int port, const char *print)
xfree (str);
}
else
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
{
if (ip->family == AF_INET)
logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
else if (ip->family == AF_INET6)
logprintf (LOG_VERBOSE, _("Connecting to [%s]:%d... "), txt_addr, port);
}
}
/* Store the sockaddr info to SA. */
@ -581,6 +584,36 @@ socket_ip_address (int sock, ip_address *ip, int endpoint)
}
}
/* Get the socket family of connection on FD and store
Return family type on success, -1 otherwise.
If ENDPOINT is ENDPOINT_LOCAL, it returns the sock family of the local
(client) side of the socket. Else if ENDPOINT is ENDPOINT_PEER, it
returns the sock family of the remote (peer's) side of the socket. */
int
socket_family (int sock, int endpoint)
{
struct sockaddr_storage storage;
struct sockaddr *sockaddr = (struct sockaddr *) &storage;
socklen_t addrlen = sizeof (storage);
int ret;
memset (sockaddr, 0, addrlen);
if (endpoint == ENDPOINT_LOCAL)
ret = getsockname (sock, sockaddr, &addrlen);
else if (endpoint == ENDPOINT_PEER)
ret = getpeername (sock, sockaddr, &addrlen);
else
abort ();
if (ret < 0)
return -1;
return sockaddr->sa_family;
}
/* Return true if the error from the connect code can be considered
retryable. Wget normally retries after errors, but the exception
are the "unsupported protocol" type errors (possible on IPv4/IPv6

View File

@ -51,6 +51,7 @@ enum {
ENDPOINT_PEER
};
bool socket_ip_address (int, ip_address *, int);
int socket_family (int sock, int endpoint);
bool retryable_socket_connect_error (int);

View File

@ -87,7 +87,7 @@ static pthread_mutex_t convert_mutex = PTHREAD_MUTEX_INITIALIZER;
static void convert_links (const char *, struct urlpos *);
void
static void
convert_links_in_hashtable (struct hash_table *downloaded_set,
int is_css,
int *file_count)
@ -153,6 +153,9 @@ convert_links_in_hashtable (struct hash_table *downloaded_set,
set_uri_encoding (pi, opt.locale, true);
u = url_parse (cur_url->url->url, NULL, pi, true);
if (!u)
continue;
local_name = hash_table_get (dl_url_file_map, u->url);
/* Decide on the conversion type. */

View File

@ -101,8 +101,8 @@ downloaded_file_t downloaded_file (downloaded_file_t, const char *);
void register_download (const char *, const char *);
void register_redirection (const char *, const char *);
void register_html (const char *, const char *);
void register_css (const char *, const char *);
void register_html (const char *);
void register_css (const char *);
void register_delete_file (const char *);
void convert_all_links (void);
void convert_cleanup (void);

View File

@ -391,6 +391,9 @@ parse_set_cookie (const char *set_cookie, bool silent)
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
/* Check if expiration spec is valid.
If not, assume default (cookie doesn't expire, but valid only for
this session.) */
expires = http_atotm (value_copy);
if (expires != (time_t) -1)
{
@ -402,10 +405,6 @@ parse_set_cookie (const char *set_cookie, bool silent)
if (cookie->expiry_time < cookies_now)
cookie->discard_requested = 1;
}
else
/* Error in expiration spec. Assume default (cookie doesn't
expire, but valid only for this session.) */
;
}
else if (TOKEN_IS (name, "max-age"))
{
@ -433,9 +432,7 @@ parse_set_cookie (const char *set_cookie, bool silent)
/* ignore value completely */
cookie->secure = 1;
}
else
/* Ignore unrecognized attribute. */
;
/* else: Ignore unrecognized attribute. */
}
if (*ptr)
/* extract_param has encountered a syntax error */

View File

@ -55,6 +55,7 @@ as that of the covered work. */
#include "convert.h"
#include "html-url.h"
#include "css-tokens.h"
#include "css-url.h"
/* from lex.yy.c */
extern char *yytext;
@ -107,7 +108,7 @@ const char *token_names[] = {
whitespace after the opening parenthesis and before the closing
parenthesis.
*/
char *
static char *
get_uri_string (const char *at, int *pos, int *length)
{
char *uri;

View File

@ -30,6 +30,7 @@ as that of the covered work. */
#ifndef CSS_URL_H
#define CSS_URL_H
void get_urls_css (struct map_context *, int, int);
void get_urls_css (struct map_context *, int, int);
struct urlpos *get_urls_css_file (const char *, const char *);

View File

@ -1,7 +1,5 @@
/* Command line parsing.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
Inc.
/* Exit status handling.
Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
@ -60,7 +58,7 @@ get_status_for_err (uerr_t err)
case RETROK:
return WGET_EXIT_SUCCESS;
case FOPENERR: case FOPEN_EXCL_ERR: case FWRITEERR: case WRITEFAILED:
case UNLINKERR:
case UNLINKERR: case CLOSEFAILED:
return WGET_EXIT_IO_FAIL;
case NOCONERROR: case HOSTERR: case CONSOCKERR: case CONERROR:
case CONSSLERR: case CONIMPOSSIBLE: case FTPRERR: case FTPINVPASV:

View File

@ -1,5 +1,5 @@
/* Internationalization related declarations.
Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
/* Exit status related declarations.
Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.

View File

@ -524,7 +524,10 @@ ftp_pasv (int csock, ip_address *addr, int *port)
for (s += 4; *s && !c_isdigit (*s); s++)
;
if (!*s)
return FTPINVPASV;
{
xfree (respline);
return FTPINVPASV;
}
for (i = 0; i < 6; i++)
{
tmp[i] = 0;
@ -593,7 +596,10 @@ ftp_lpsv (int csock, ip_address *addr, int *port)
for (s += 4; *s && !c_isdigit (*s); s++)
;
if (!*s)
return FTPINVPASV;
{
xfree (respline);
return FTPINVPASV;
}
/* First, get the address family */
af = 0;

125
src/ftp.c
View File

@ -49,6 +49,7 @@ as that of the covered work. */
#include "netrc.h"
#include "convert.h" /* for downloaded_file */
#include "recur.h" /* for INFINITE_RECURSION */
#include "warc.h"
#ifdef __VMS
# include "vms.h"
@ -237,17 +238,17 @@ static uerr_t ftp_get_listing (struct url *, ccon *, struct fileinfo **);
/* Retrieves a file with denoted parameters through opening an FTP
connection to the server. It always closes the data connection,
and closes the control connection in case of error. */
and closes the control connection in case of error. If warc_tmp
is non-NULL, the downloaded data will be written there as well. */
static uerr_t
getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
wgint restval, ccon *con, int count)
wgint restval, ccon *con, int count, FILE *warc_tmp)
{
int csock, dtsock, local_sock, res;
uerr_t err = RETROK; /* appease the compiler */
FILE *fp;
char *user, *passwd, *respline;
char *tms;
const char *tmrate;
char *respline, *tms;
const char *user, *passwd, *tmrate;
int cmd = con->cmd;
bool pasv_mode_open = false;
wgint expected_bytes = 0;
@ -287,13 +288,6 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
{
char *host = con->proxy ? con->proxy->host : u->host;
int port = con->proxy ? con->proxy->port : u->port;
char *logname = user;
if (con->proxy)
{
/* If proxy is in use, log in as username@target-site. */
logname = concat_strings (user, "@", u->host, (char *) 0);
}
/* Login to the server: */
@ -301,20 +295,10 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
csock = connect_to_host (host, port);
if (csock == E_HOST)
{
if (con->proxy)
xfree (logname);
return HOSTERR;
}
else if (csock < 0)
{
if (con->proxy)
xfree (logname);
return (retryable_socket_connect_error (errno)
? CONERROR : CONIMPOSSIBLE);
}
if (cmd & LEAVE_PENDING)
con->csock = csock;
@ -326,10 +310,15 @@ getftp (struct url *u, wgint passed_expected_bytes, wgint *qtyread,
quotearg_style (escape_quoting_style, user));
if (opt.server_response)
logputs (LOG_ALWAYS, "\n");
err = ftp_login (csock, logname, passwd);
if (con->proxy)
xfree (logname);
{
/* If proxy is in use, log in as username@target-site. */
char *logname = concat_strings (user, "@", u->host, (char *) 0);
err = ftp_login (csock, logname, passwd);
xfree (logname);
}
else
err = ftp_login (csock, user, passwd);
/* FTPRERR, FTPSRVERR, WRITEFAILED, FTPLOGREFUSED, FTPLOGINC */
switch (err)
@ -512,7 +501,7 @@ Error in server response, closing control connection.\n"));
logputs (LOG_VERBOSE, _("==> CWD not needed.\n"));
else
{
char *targ = NULL;
const char *targ = NULL;
int cwd_count;
int cwd_end;
int cwd_start;
@ -1152,13 +1141,25 @@ Error in server response, closing control connection.\n"));
Elsewhere, define a constant "binary" flag.
Isn't it nice to have distinct text and binary file types?
*/
# define BIN_TYPE_TRANSFER (type_char != 'A')
/* 2011-09-30 SMS.
Added listing files to the set of non-"binary" (text, Stream_LF)
files. (Wget works either way, but other programs, like, say, text
editors, work better on listing files which have text attributes.)
Now we use "binary" attributes for a binary ("IMAGE") transfer,
unless "--ftp-stmlf" was specified, and we always use non-"binary"
(text, Stream_LF) attributes for a listing file, or for an ASCII
transfer.
Tidied the VMS-specific BIN_TYPE_xxx macros, and changed the call to
fopen_excl() (restored?) to use BIN_TYPE_FILE instead of "true".
*/
#ifdef __VMS
# define BIN_TYPE_TRANSFER (type_char != 'A')
# define BIN_TYPE_FILE \
((!(cmd & DO_LIST)) && BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
# define FOPEN_OPT_ARGS_BIN "ctx=bin,stm", "rfm=fix", "mrs=512" FOPEN_OPT_ARGS
# define BIN_TYPE_FILE (BIN_TYPE_TRANSFER && (opt.ftp_stmlf == 0))
#else /* def __VMS */
# define BIN_TYPE_FILE 1
# define BIN_TYPE_FILE true
#endif /* def __VMS [else] */
if (restval && !(con->cmd & DO_LIST))
@ -1182,7 +1183,7 @@ Error in server response, closing control connection.\n"));
}
else if (opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct
|| opt.output_document || count > 0)
{
{
if (opt.unlink && file_exists_p (con->target))
{
int res = unlink (con->target);
@ -1217,7 +1218,7 @@ Error in server response, closing control connection.\n"));
}
else
{
fp = fopen_excl (con->target, true);
fp = fopen_excl (con->target, BIN_TYPE_FILE);
if (!fp && errno == EEXIST)
{
/* We cannot just invent a new name and use it (which is
@ -1262,7 +1263,7 @@ Error in server response, closing control connection.\n"));
rd_size = 0;
res = fd_read_body (dtsock, fp,
expected_bytes ? expected_bytes - restval : 0,
restval, &rd_size, qtyread, &con->dltime, flags);
restval, &rd_size, qtyread, &con->dltime, flags, warc_tmp);
tms = datetime_str (time (NULL));
tmrate = retr_rate (rd_size, con->dltime);
@ -1273,15 +1274,18 @@ Error in server response, closing control connection.\n"));
if (!output_stream || con->cmd & DO_LIST)
fclose (fp);
/* If fd_read_body couldn't write to fp, bail out. */
if (res == -2)
/* If fd_read_body couldn't write to fp or warc_tmp, bail out. */
if (res == -2 || (warc_tmp != NULL && res == -3))
{
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
con->target, strerror (errno));
fd_close (csock);
con->csock = -1;
fd_close (dtsock);
return FWRITEERR;
if (res == -2)
return FWRITEERR;
else if (res == -3)
return WARC_TMP_FWRITEERR;
}
else if (res == -1)
{
@ -1397,6 +1401,11 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
uerr_t err;
struct_stat st;
/* Declare WARC variables. */
bool warc_enabled = (opt.warc_filename != NULL);
FILE *warc_tmp = NULL;
ip_address *warc_ip = NULL;
/* Get the target, and set the name for the message accordingly. */
if ((f == NULL) && (con->target))
{
@ -1433,6 +1442,21 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
orig_lp = con->cmd & LEAVE_PENDING ? 1 : 0;
/* For file RETR requests, we can write a WARC record.
We record the file contents to a temporary file. */
if (warc_enabled && (con->cmd & DO_RETR))
{
warc_tmp = warc_tempfile ();
if (warc_tmp == NULL)
return WARC_TMP_FOPENERR;
if (!con->proxy && con->csock != -1)
{
warc_ip = (ip_address *) alloca (sizeof (ip_address));
socket_ip_address (con->csock, warc_ip, ENDPOINT_PEER);
}
}
/* THE loop. */
do
{
@ -1507,7 +1531,9 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
len = range->last_byte - restval + 1;
}
err = getftp (u, len, &qtyread, restval, con, count);
/* If we are working on a WARC record, getftp should also write
to the warc_tmp file. */
err = getftp (u, len, &qtyread, restval, con, count, warc_tmp);
if (range)
range->bytes_covered = qtyread;
@ -1521,8 +1547,10 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
{
case HOSTERR: case CONIMPOSSIBLE: case FWRITEERR: case FOPENERR:
case FTPNSFOD: case FTPLOGINC: case FTPNOPASV: case CONTNOTSUPPORTED:
case UNLINKERR:
case UNLINKERR: case WARC_TMP_FWRITEERR:
/* Fatal errors, give up. */
if (warc_tmp != NULL)
fclose (warc_tmp);
return err;
case CONSOCKERR: case CONERROR: case FTPSRVERR: case FTPRERR:
case WRITEFAILED: case FTPUNKNOWNTYPE: case FTPSYSERR:
@ -1590,6 +1618,19 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi
xfree (hurl);
}
if (warc_enabled && (con->cmd & DO_RETR))
{
/* Create and store a WARC resource record for the retrieved file. */
bool warc_res;
warc_res = warc_write_resource_record (NULL, u->url, NULL, NULL,
warc_ip, NULL, warc_tmp, -1);
if (! warc_res)
return WARC_ERR;
/* warc_write_resource_record has also closed warc_tmp. */
}
if ((con->cmd & DO_LIST))
/* This is a directory listing file. */
{
@ -1893,8 +1934,10 @@ Already have correct symlink %s -> %s\n\n"),
set_local_file (&actual_target, con->target);
/* If downloading a plain file, set valid (non-zero) permissions. */
if (dlthis && (actual_target != NULL) && (f->type == FT_PLAINFILE))
/* If downloading a plain file, and the user requested it, then
set valid (non-zero) permissions. */
if (dlthis && (actual_target != NULL) &&
(f->type == FT_PLAINFILE) && opt.preserve_perm)
{
if (f->perms)
chmod (actual_target, f->perms);
@ -1927,7 +1970,9 @@ Already have correct symlink %s -> %s\n\n"),
xfree (ofile);
/* Break on fatals. */
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR
|| err == WARC_ERR || err == WARC_TMP_FOPENERR
|| err == WARC_TMP_FWRITEERR)
break;
con->cmd &= ~ (DO_CWD | DO_LOGIN);
f = f->next;

View File

@ -1,5 +1,5 @@
/* SSL support via GnuTLS library.
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
Foundation, Inc.
This file is part of GNU Wget.
@ -54,15 +54,38 @@ as that of the covered work. */
# include "w32sock.h"
#endif
#include "host.h"
static int
key_type_to_gnutls_type (enum keyfile_type type)
{
switch (type)
{
case keyfile_pem:
return GNUTLS_X509_FMT_PEM;
case keyfile_asn1:
return GNUTLS_X509_FMT_DER;
default:
abort ();
}
}
/* Note: some of the functions private to this file have names that
begin with "wgnutls_" (e.g. wgnutls_read) so that they wouldn't be
confused with actual gnutls functions -- such as the gnutls_read
preprocessor macro. */
static gnutls_certificate_credentials credentials;
static gnutls_certificate_credentials_t credentials;
bool
ssl_init ()
ssl_init (void)
{
/* Becomes true if GnuTLS is initialized. */
static bool ssl_initialized = false;
/* GnuTLS should be initialized only once. */
if (ssl_initialized)
return true;
const char *ca_directory;
DIR *dir;
@ -101,15 +124,48 @@ ssl_init ()
closedir (dir);
}
/* Use the private key from the cert file unless otherwise specified. */
if (opt.cert_file && !opt.private_key)
{
opt.private_key = opt.cert_file;
opt.private_key_type = opt.cert_type;
}
/* Use the cert from the private key file unless otherwise specified. */
if (!opt.cert_file && opt.private_key)
{
opt.cert_file = opt.private_key;
opt.cert_type = opt.private_key_type;
}
if (opt.cert_file && opt.private_key)
{
int type;
if (opt.private_key_type != opt.cert_type)
{
/* GnuTLS can't handle this */
logprintf (LOG_NOTQUIET, _("ERROR: GnuTLS requires the key and the \
cert to be of the same type.\n"));
}
type = key_type_to_gnutls_type (opt.private_key_type);
gnutls_certificate_set_x509_key_file (credentials, opt.cert_file,
opt.private_key,
type);
}
if (opt.ca_cert)
gnutls_certificate_set_x509_trust_file (credentials, opt.ca_cert,
GNUTLS_X509_FMT_PEM);
ssl_initialized = true;
return true;
}
struct wgnutls_transport_context
{
gnutls_session session; /* GnuTLS session handle */
gnutls_session_t session; /* GnuTLS session handle */
int last_error; /* last error returned by read/write/... */
/* Since GnuTLS doesn't support the equivalent to recv(...,
@ -132,7 +188,7 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
int flags = 0;
#endif
int ret = 0;
struct ptimer *timer;
struct ptimer *timer = NULL;
struct wgnutls_transport_context *ctx = arg;
int timed_out = 0;
@ -142,64 +198,56 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
flags = fcntl (fd, F_GETFL, 0);
if (flags < 0)
return flags;
if (fcntl (fd, F_SETFL, flags | O_NONBLOCK))
return -1;
#else
/* XXX: Assume it was blocking before. */
const int one = 1;
if (ioctl (fd, FIONBIO, &one) < 0)
return -1;
#endif
timer = ptimer_new ();
if (timer == 0)
if (timer == NULL)
return -1;
}
do
{
double next_timeout = timeout - ptimer_measure (timer);
if (timeout && next_timeout < 0)
break;
double next_timeout = 0;
if (timeout)
{
next_timeout = timeout - ptimer_measure (timer);
if (next_timeout < 0)
break;
}
ret = GNUTLS_E_AGAIN;
if (timeout == 0 || gnutls_record_check_pending (ctx->session)
|| select_fd (fd, next_timeout, WAIT_FOR_READ))
{
if (timeout)
{
#ifdef F_GETFL
ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
if (ret < 0)
return ret;
#else
/* XXX: Assume it was blocking before. */
const int one = 1;
ret = ioctl (fd, FIONBIO, &one);
if (ret < 0)
return ret;
#endif
}
ret = gnutls_record_recv (ctx->session, buf, bufsize);
if (timeout)
{
int status;
#ifdef F_GETFL
status = fcntl (fd, F_SETFL, flags);
if (status < 0)
return status;
#else
const int zero = 0;
status = ioctl (fd, FIONBIO, &zero);
if (status < 0)
return status;
#endif
}
timed_out = timeout && ptimer_measure (timer) >= timeout;
}
timed_out = timeout && ptimer_measure (timer) >= timeout;
}
while (ret == GNUTLS_E_INTERRUPTED || (ret == GNUTLS_E_AGAIN && !timed_out));
if (timeout)
ptimer_destroy (timer);
{
ptimer_destroy (timer);
if (timeout && timed_out && ret == GNUTLS_E_AGAIN)
errno = ETIMEDOUT;
#ifdef F_GETFL
if (fcntl (fd, F_SETFL, flags) < 0)
return -1;
#else
const int zero = 0;
if (ioctl (fd, FIONBIO, &zero) < 0)
return -1;
#endif
if (timed_out && ret == GNUTLS_E_AGAIN)
errno = ETIMEDOUT;
}
return ret;
}
@ -207,11 +255,7 @@ wgnutls_read_timeout (int fd, char *buf, int bufsize, void *arg, double timeout)
static int
wgnutls_read (int fd, char *buf, int bufsize, void *arg)
{
#ifdef F_GETFL
int flags = 0;
#endif
int ret = 0;
struct ptimer *timer;
struct wgnutls_transport_context *ctx = arg;
if (ctx->peeklen)
@ -250,8 +294,12 @@ static int
wgnutls_poll (int fd, double timeout, int wait_for, void *arg)
{
struct wgnutls_transport_context *ctx = arg;
return ctx->peeklen || gnutls_record_check_pending (ctx->session)
|| select_fd (fd, timeout, wait_for);
if (timeout)
return ctx->peeklen || gnutls_record_check_pending (ctx->session)
|| select_fd (fd, timeout, wait_for);
else
return ctx->peeklen || gnutls_record_check_pending (ctx->session);
}
static int
@ -260,15 +308,19 @@ wgnutls_peek (int fd, char *buf, int bufsize, void *arg)
int read = 0;
struct wgnutls_transport_context *ctx = arg;
int offset = MIN (bufsize, ctx->peeklen);
if (ctx->peeklen)
{
memcpy (buf, ctx->peekbuf, offset);
return offset;
}
if (bufsize > sizeof ctx->peekbuf)
bufsize = sizeof ctx->peekbuf;
if (ctx->peeklen)
memcpy (buf, ctx->peekbuf, offset);
if (bufsize > offset)
{
if (gnutls_record_check_pending (ctx->session) <= 0
if (opt.read_timeout && gnutls_record_check_pending (ctx->session) == 0
&& select_fd (fd, 0.0, WAIT_FOR_READ) <= 0)
read = 0;
else
@ -320,18 +372,26 @@ static struct transport_implementation wgnutls_transport =
};
bool
ssl_connect_wget (int fd)
ssl_connect_wget (int fd, const char *hostname)
{
struct wgnutls_transport_context *ctx;
gnutls_session session;
gnutls_session_t session;
int err;
gnutls_init (&session, GNUTLS_CLIENT);
/* We set the server name but only if it's not an IP address. */
if (! is_valid_ip_address (hostname))
{
gnutls_server_name_set (session, GNUTLS_NAME_DNS, hostname,
strlen (hostname));
}
gnutls_set_default_priority (session);
gnutls_credentials_set (session, GNUTLS_CRD_CERTIFICATE, credentials);
#ifndef FD_TO_SOCKET
# define FD_TO_SOCKET(X) (X)
#endif
gnutls_transport_set_ptr (session, (gnutls_transport_ptr) FD_TO_SOCKET (fd));
gnutls_transport_set_ptr (session, (gnutls_transport_ptr_t) FD_TO_SOCKET (fd));
err = 0;
#if HAVE_GNUTLS_PRIORITY_SET_DIRECT
@ -438,8 +498,8 @@ ssl_check_certificate (int fd, const char *host)
if (gnutls_certificate_type_get (ctx->session) == GNUTLS_CRT_X509)
{
time_t now = time (NULL);
gnutls_x509_crt cert;
const gnutls_datum *cert_list;
gnutls_x509_crt_t cert;
const gnutls_datum_t *cert_list;
unsigned int cert_list_size;
if ((err = gnutls_x509_crt_init (&cert)) < 0)

View File

@ -423,14 +423,14 @@ grow_hash_table (struct hash_table *ht)
table if necessary. */
void
hash_table_put (struct hash_table *ht, const void *key, void *value)
hash_table_put (struct hash_table *ht, const void *key, const void *value)
{
struct cell *c = find_cell (ht, key);
if (CELL_OCCUPIED (c))
{
/* update existing item */
c->key = (void *)key; /* const? */
c->value = value;
c->value = (void *)value;
return;
}
@ -445,7 +445,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
/* add new item */
++ht->count;
c->key = (void *)key; /* const? */
c->value = value;
c->value = (void *)value;
}
/* Remove KEY->value mapping from HT. Return 0 if there was no such

View File

@ -42,7 +42,7 @@ int hash_table_get_pair (const struct hash_table *, const void *,
void *, void *);
int hash_table_contains (const struct hash_table *, const void *);
void hash_table_put (struct hash_table *, const void *, void *);
void hash_table_put (struct hash_table *, const void *, const void *);
int hash_table_remove (struct hash_table *, const void *);
void hash_table_clear (struct hash_table *);

View File

@ -1,6 +1,6 @@
/* Host name resolution and matching.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@ -914,3 +914,18 @@ host_cleanup (void)
host_name_addresses_map = NULL;
}
}
bool
is_valid_ip_address (const char *name)
{
const char *endp;
endp = name + strlen(name);
if (is_valid_ipv4_address (name, endp))
return true;
#ifdef ENABLE_IPV6
if (is_valid_ipv6_address (name, endp))
return true;
#endif
return false;
}

View File

@ -1,6 +1,6 @@
/* Declarations for host.c
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@ -98,6 +98,8 @@ const char *print_address (const ip_address *);
bool is_valid_ipv6_address (const char *, const char *);
#endif
bool is_valid_ip_address (const char *name);
bool accept_domain (struct url *);
bool sufmatch (const char **, const char *);

View File

@ -280,7 +280,7 @@ struct tagstack_item {
struct tagstack_item *next;
};
struct tagstack_item *
static struct tagstack_item *
tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
{
struct tagstack_item *ts = xmalloc(sizeof(struct tagstack_item));
@ -301,7 +301,7 @@ tagstack_push (struct tagstack_item **head, struct tagstack_item **tail)
}
/* remove ts and everything after it from the stack */
void
static void
tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
struct tagstack_item *ts)
{
@ -343,7 +343,7 @@ tagstack_pop (struct tagstack_item **head, struct tagstack_item **tail,
}
}
struct tagstack_item *
static struct tagstack_item *
tagstack_find (struct tagstack_item *tail, const char *tagname_begin,
const char *tagname_end)
{

View File

@ -1,6 +1,6 @@
/* Collect URLs from HTML source.
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GNU Wget.
@ -675,8 +675,9 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
check_style_attr (tag, ctx);
if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style")) &&
tag->contents_begin && tag->contents_end)
if (tag->end_tag_p && (0 == strcasecmp (tag->name, "style"))
&& tag->contents_begin && tag->contents_end
&& tag->contents_begin <= tag->contents_end)
{
/* parse contents */
get_urls_css (ctx, tag->contents_begin - ctx->text,
@ -829,7 +830,7 @@ get_urls_file (const char *file)
return head;
}
void
static void
cleanup_html_url (void)
{
/* Destroy the hash tables. The hash table keys and values are not

View File

@ -1,6 +1,6 @@
/* HTTP support.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@ -62,6 +62,7 @@ as that of the covered work. */
#include "md5.h"
#include "convert.h"
#include "spider.h"
#include "warc.h"
#ifdef TESTING
#include "test.h"
@ -234,7 +235,7 @@ release_header (struct request_header *hdr)
*/
static void
request_set_header (struct request *req, char *name, char *value,
request_set_header (struct request *req, const char *name, const char *value,
enum rp release_policy)
{
struct request_header *hdr;
@ -245,7 +246,7 @@ request_set_header (struct request *req, char *name, char *value,
/* A NULL value is a no-op; if freeing the name is requested,
free it now to avoid leaks. */
if (release_policy == rel_name || release_policy == rel_both)
xfree (name);
xfree ((void *)name);
return;
}
@ -256,8 +257,8 @@ request_set_header (struct request *req, char *name, char *value,
{
/* Replace existing header. */
release_header (hdr);
hdr->name = name;
hdr->value = value;
hdr->name = (void *)name;
hdr->value = (void *)value;
hdr->release_policy = release_policy;
return;
}
@ -271,8 +272,8 @@ request_set_header (struct request *req, char *name, char *value,
req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
}
hdr = &req->headers[req->hcount++];
hdr->name = name;
hdr->value = value;
hdr->name = (void *)name;
hdr->value = (void *)value;
hdr->release_policy = release_policy;
}
@ -299,7 +300,7 @@ request_set_user_header (struct request *req, const char *header)
the header was actually removed, false otherwise. */
static bool
request_remove_header (struct request *req, char *name)
request_remove_header (struct request *req, const char *name)
{
int i;
for (i = 0; i < req->hcount; i++)
@ -324,10 +325,12 @@ request_remove_header (struct request *req, char *name)
p += A_len; \
} while (0)
/* Construct the request and write it to FD using fd_write. */
/* Construct the request and write it to FD using fd_write.
If warc_tmp is set to a file pointer, the request string will
also be written to that file. */
static int
request_send (const struct request *req, int fd)
request_send (const struct request *req, int fd, FILE *warc_tmp)
{
char *request_string, *p;
int i, size, write_error;
@ -378,6 +381,13 @@ request_send (const struct request *req, int fd)
if (write_error < 0)
logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
fd_errstr (fd));
else if (warc_tmp != NULL)
{
/* Write a copy of the data to the WARC record. */
int warc_tmp_written = fwrite (request_string, 1, size - 1, warc_tmp);
if (warc_tmp_written != size - 1)
return -2;
}
return write_error;
}
@ -448,10 +458,12 @@ register_basic_auth_host (const char *hostname)
/* Send the contents of FILE_NAME to SOCK. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is
longer, read only that much; if the file is shorter, report an error. */
longer, read only that much; if the file is shorter, report an error.
If warc_tmp is set to a file pointer, the post data will
also be written to that file. */
static int
post_file (int sock, const char *file_name, wgint promised_size)
post_file (int sock, const char *file_name, wgint promised_size, FILE *warc_tmp)
{
static char chunk[8192];
wgint written = 0;
@ -476,6 +488,16 @@ post_file (int sock, const char *file_name, wgint promised_size)
fclose (fp);
return -1;
}
if (warc_tmp != NULL)
{
/* Write a copy of the data to the WARC record. */
int warc_tmp_written = fwrite (chunk, 1, towrite, warc_tmp);
if (warc_tmp_written != towrite)
{
fclose (fp);
return -2;
}
}
written += towrite;
}
fclose (fp);
@ -933,9 +955,12 @@ skip_short_body (int fd, wgint contlen, bool chunked)
break;
remaining_chunk_size = strtol (line, &endl, 16);
xfree (line);
if (remaining_chunk_size == 0)
{
fd_read_line (fd);
line = fd_read_line (fd);
xfree_null (line);
break;
}
}
@ -960,8 +985,13 @@ skip_short_body (int fd, wgint contlen, bool chunked)
{
remaining_chunk_size -= ret;
if (remaining_chunk_size == 0)
if (fd_read_line (fd) == NULL)
return false;
{
char *line = fd_read_line (fd);
if (line == NULL)
return false;
else
xfree (line);
}
}
/* Safe even if %.*s bogusly expects terminating \0 because
@ -1651,6 +1681,135 @@ File %s already there; not retrieving.\n\n"), quote (filename));
*dt |= TEXTHTML;
}
/* Download the response body from the socket and writes it to
an output file. The headers have already been read from the
socket. If WARC is enabled, the response body will also be
written to a WARC response record.
hs, contlen, contrange, chunked_transfer_encoding and url are
parameters from the gethttp method. fp is a pointer to the
output file.
url, warc_timestamp_str, warc_request_uuid, warc_ip, type
and statcode will be saved in the headers of the WARC record.
The head parameter contains the HTTP headers of the response.
If fp is NULL and WARC is enabled, the response body will be
written only to the WARC file. If WARC is disabled and fp
is a file pointer, the data will be written to the file.
If fp is a file pointer and WARC is enabled, the body will
be written to both destinations.
Returns the error code. */
static int
read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
wgint contrange, bool chunked_transfer_encoding,
char *url, char *warc_timestamp_str, char *warc_request_uuid,
ip_address *warc_ip, char *type, int statcode, char *head)
{
int warc_payload_offset = 0;
FILE *warc_tmp = NULL;
int warcerr = 0;
if (opt.warc_filename != NULL)
{
/* Open a temporary file where we can write the response before we
add it to the WARC record. */
warc_tmp = warc_tempfile ();
if (warc_tmp == NULL)
warcerr = WARC_TMP_FOPENERR;
if (warcerr == 0)
{
/* We should keep the response headers for the WARC record. */
int head_len = strlen (head);
int warc_tmp_written = fwrite (head, 1, head_len, warc_tmp);
if (warc_tmp_written != head_len)
warcerr = WARC_TMP_FWRITEERR;
warc_payload_offset = head_len;
}
if (warcerr != 0)
{
if (warc_tmp != NULL)
fclose (warc_tmp);
return warcerr;
}
}
if (fp != NULL)
{
/* This confuses the timestamping code that checks for file size.
#### The timestamping code should be smarter about file size. */
if (opt.save_headers && hs->restval == 0)
fwrite (head, 1, strlen (head), fp);
}
/* Read the response body. */
int flags = 0;
if (contlen != -1)
/* If content-length is present, read that much; otherwise, read
until EOF. The HTTP spec doesn't require the server to
actually close the connection when it's done sending data. */
flags |= rb_read_exactly;
if (fp != NULL && hs->restval > 0 && contrange == 0)
/* If the server ignored our range request, instruct fd_read_body
to skip the first RESTVAL bytes of body. */
flags |= rb_skip_startpos;
if (chunked_transfer_encoding)
flags |= rb_chunked_transfer_encoding;
hs->len = hs->restval;
hs->rd_size = 0;
/* Download the response body and write it to fp.
If we are working on a WARC file, we simultaneously write the
response body to warc_tmp. */
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
flags, warc_tmp);
if (hs->res >= 0)
{
if (warc_tmp != NULL)
{
/* Create a response record and write it to the WARC file.
Note: per the WARC standard, the request and response should share
the same date header. We re-use the timestamp of the request.
The response record should also refer to the uuid of the request. */
bool r = warc_write_response_record (url, warc_timestamp_str,
warc_request_uuid, warc_ip,
warc_tmp, warc_payload_offset,
type, statcode, hs->newloc);
/* warc_write_response_record has closed warc_tmp. */
if (! r)
return WARC_ERR;
}
return RETRFINISHED;
}
if (warc_tmp != NULL)
fclose (warc_tmp);
if (hs->res == -2)
{
/* Error while writing to fd. */
return FWRITEERR;
}
else if (hs->res == -3)
{
/* Error while writing to warc_tmp. */
return WARC_TMP_FWRITEERR;
}
else
{
/* A read error! */
hs->rderrmsg = xstrdup (fd_errstr (sock));
return RETRFINISHED;
}
}
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
&& (c_isspace (line[sizeof (string_constant) - 1]) \
@ -1708,9 +1867,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
wgint contlen, contrange;
struct url *conn;
FILE *fp;
int err;
int sock = -1;
int flags;
#ifdef ENABLE_THREADS
struct s_pconn *pconn = NULL;
@ -1740,6 +1899,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
char hdrval[256];
char *message;
/* Declare WARC variables. */
bool warc_enabled = (opt.warc_filename != NULL);
FILE *warc_tmp = NULL;
char warc_timestamp_str [21];
char warc_request_uuid [48];
ip_address *warc_ip = NULL;
off_t warc_payload_offset = -1;
/* Whether this connection will be kept alive after the HTTP request
is done. */
bool keep_alive;
@ -1995,12 +2162,18 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
0,
#endif
&host_lookup_failed))
{
{
int family = socket_family (pconn.socket, ENDPOINT_PEER);
sock = pconn.socket;
using_ssl = pconn.ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
if (family == AF_INET6)
logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"),
quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
else
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn.authorized)
#else
@ -2015,11 +2188,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
if (pconn)
{
int family = socket_family (pconn->socket, ENDPOINT_PEER);
sock = pconn->socket;
using_ssl = pconn->ssl;
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
quotearg_style (escape_quoting_style, pconn->host),
pconn->port);
if (family == AF_INET6)
logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"),
quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
else
logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
quotearg_style (escape_quoting_style, pconn.host),
pconn.port);
DEBUGP (("Reusing fd %d.\n", sock));
if (pconn->authorized)
#endif
@ -2076,11 +2255,12 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
that the contents of Host would be exactly the same as
the contents of CONNECT. */
write_error = request_send (connreq, sock);
write_error = request_send (connreq, sock, 0);
request_free (connreq);
if (write_error < 0)
{
CLOSE_INVALIDATE (sock);
request_free (req);
return WRITEFAILED;
}
@ -2090,6 +2270,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
fd_errstr (sock));
CLOSE_INVALIDATE (sock);
request_free (req);
return HERR;
}
message = NULL;
@ -2110,6 +2291,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
quotearg_style (escape_quoting_style,
_("Malformed status line")));
xfree (head);
request_free (req);
return HERR;
}
hs->message = xstrdup (message);
@ -2121,6 +2303,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
message ? quotearg_style (escape_quoting_style, message) : "?");
xfree_null (message);
request_free (req);
return CONSSLERR;
}
xfree_null (message);
@ -2133,14 +2316,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
if (conn->scheme == SCHEME_HTTPS)
{
if (!ssl_connect_wget (sock))
if (!ssl_connect_wget (sock, u->host))
{
fd_close (sock);
request_free (req);
return CONSSLERR;
}
else if (!ssl_check_certificate (sock, u->host))
{
fd_close (sock);
request_free (req);
return VERIFCERTERR;
}
using_ssl = true;
@ -2148,8 +2333,26 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
#endif /* HAVE_SSL */
}
/* Open the temporary file where we will write the request. */
if (warc_enabled)
{
warc_tmp = warc_tempfile ();
if (warc_tmp == NULL)
{
CLOSE_INVALIDATE (sock);
request_free (req);
return WARC_TMP_FOPENERR;
}
if (! proxy)
{
warc_ip = (ip_address *) alloca (sizeof (ip_address));
socket_ip_address (sock, warc_ip, ENDPOINT_PEER);
}
}
/* Send the request to server. */
write_error = request_send (req, sock);
write_error = request_send (req, sock, warc_tmp);
if (write_error >= 0)
{
@ -2157,16 +2360,39 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
{
DEBUGP (("[POST data: %s]\n", opt.post_data));
write_error = fd_write (sock, opt.post_data, post_data_size, -1);
if (write_error >= 0 && warc_tmp != NULL)
{
/* Remember end of headers / start of payload. */
warc_payload_offset = ftello (warc_tmp);
/* Write a copy of the data to the WARC record. */
int warc_tmp_written = fwrite (opt.post_data, 1, post_data_size, warc_tmp);
if (warc_tmp_written != post_data_size)
write_error = -2;
}
}
else if (opt.post_file_name && post_data_size != 0)
write_error = post_file (sock, opt.post_file_name, post_data_size);
{
if (warc_tmp != NULL)
/* Remember end of headers / start of payload. */
warc_payload_offset = ftello (warc_tmp);
write_error = post_file (sock, opt.post_file_name, post_data_size, warc_tmp);
}
}
if (write_error < 0)
{
CLOSE_INVALIDATE (sock);
request_free (req);
return WRITEFAILED;
if (warc_tmp != NULL)
fclose (warc_tmp);
if (write_error == -2)
return WARC_TMP_FWRITEERR;
else
return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
proxy ? "Proxy" : "HTTP");
@ -2174,6 +2400,29 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
contrange = 0;
*dt &= ~RETROKF;
if (warc_enabled)
{
bool warc_result;
/* Generate a timestamp and uuid for this request. */
warc_timestamp (warc_timestamp_str);
warc_uuid_str (warc_request_uuid);
/* Create a request record and store it in the WARC file. */
warc_result = warc_write_request_record (u->url, warc_timestamp_str,
warc_request_uuid, warc_ip,
warc_tmp, warc_payload_offset);
if (! warc_result)
{
CLOSE_INVALIDATE (sock);
request_free (req);
return WARC_ERR;
}
/* warc_write_request_record has also closed warc_tmp. */
}
read_header:
head = read_http_response_head (sock);
if (!head)
@ -2209,6 +2458,7 @@ read_header:
quotearg_style (escape_quoting_style,
_("Malformed status line")));
CLOSE_INVALIDATE (sock);
resp_free (resp);
request_free (req);
xfree (head);
return HERR;
@ -2217,6 +2467,7 @@ read_header:
if (H_10X (statcode))
{
DEBUGP (("Ignoring response\n"));
resp_free (resp);
xfree (head);
goto read_header;
}
@ -2297,11 +2548,42 @@ read_header:
if (statcode == HTTP_STATUS_UNAUTHORIZED)
{
/* Authorization is required. */
if (keep_alive && !head_only
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
/* Normally we are not interested in the response body.
But if we are writing a WARC file we are: we like to keep everyting. */
if (warc_enabled)
{
int err;
type = resp_header_strdup (resp, "Content-Type");
err = read_response_body (hs, sock, NULL, contlen, 0,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
xfree_null (type);
if (err != RETRFINISHED || hs->res < 0)
{
CLOSE_INVALIDATE (sock);
request_free (req);
xfree_null (message);
resp_free (resp);
xfree (head);
return err;
}
else
CLOSE_FINISH (sock);
}
else
CLOSE_INVALIDATE (sock);
{
/* Since WARC is disabled, we are not interested in the response body. */
if (keep_alive && !head_only
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
}
#ifndef ENABLE_THREADS
pconn.authorized = false;
#else
@ -2416,6 +2698,8 @@ read_header:
retrieve the file. But if the output_document was given, then this
test was already done and the file didn't exist. Hence the !opt.output_document */
get_file_flags (hs->local_file, dt);
request_free (req);
resp_free (resp);
xfree (head);
xfree_null (message);
return RETRUNNEEDED;
@ -2559,11 +2843,42 @@ read_header:
_("Location: %s%s\n"),
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
hs->newloc ? _(" [following]") : "");
if (keep_alive && !head_only
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
/* In case the caller cares to look... */
hs->len = 0;
hs->res = 0;
hs->restval = 0;
/* Normally we are not interested in the response body of a redirect.
But if we are writing a WARC file we are: we like to keep everyting. */
if (warc_enabled)
{
int err = read_response_body (hs, sock, NULL, contlen, 0,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
if (err != RETRFINISHED || hs->res < 0)
{
CLOSE_INVALIDATE (sock);
xfree_null (type);
xfree (head);
return err;
}
else
CLOSE_FINISH (sock);
}
else
CLOSE_INVALIDATE (sock);
{
/* Since WARC is disabled, we are not interested in the response body. */
if (keep_alive && !head_only
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
}
xfree_null (type);
xfree (head);
/* From RFC2616: The status codes 303 and 307 have
@ -2680,8 +2995,6 @@ read_header:
logputs (LOG_VERBOSE, "\n");
}
}
xfree_null (type);
type = NULL; /* We don't need it any more. */
/* Return if we have no intention of further downloading. */
if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
@ -2689,21 +3002,48 @@ read_header:
/* In case the caller cares to look... */
hs->len = 0;
hs->res = 0;
xfree_null (type);
if (head_only)
/* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
servers not to send body in response to a HEAD request, and
those that do will likely be caught by test_socket_open.
If not, they can be worked around using
`--no-http-keep-alive'. */
CLOSE_FINISH (sock);
else if (keep_alive
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
/* Successfully skipped the body; also keep using the socket. */
CLOSE_FINISH (sock);
hs->restval = 0;
/* Normally we are not interested in the response body of a error responses.
But if we are writing a WARC file we are: we like to keep everyting. */
if (warc_enabled)
{
int err = read_response_body (hs, sock, NULL, contlen, 0,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
if (err != RETRFINISHED || hs->res < 0)
{
CLOSE_INVALIDATE (sock);
xfree (head);
xfree_null (type);
return err;
}
else
CLOSE_FINISH (sock);
}
else
CLOSE_INVALIDATE (sock);
{
/* Since WARC is disabled, we are not interested in the response body. */
if (head_only)
/* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
servers not to send body in response to a HEAD request, and
those that do will likely be caught by test_socket_open.
If not, they can be worked around using
`--no-http-keep-alive'. */
CLOSE_FINISH (sock);
else if (keep_alive
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
/* Successfully skipped the body; also keep using the socket. */
CLOSE_FINISH (sock);
else
CLOSE_INVALIDATE (sock);
}
xfree (head);
xfree_null (type);
return RETRFINISHED;
}
@ -2745,6 +3085,7 @@ read_header:
strerror (errno));
CLOSE_INVALIDATE (sock);
xfree (head);
xfree_null (type);
return UNLINKERR;
}
}
@ -2772,6 +3113,7 @@ read_header:
hs->local_file);
CLOSE_INVALIDATE (sock);
xfree (head);
xfree_null (type);
return FOPEN_EXCL_ERR;
}
}
@ -2780,6 +3122,7 @@ read_header:
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock);
xfree (head);
xfree_null (type);
return FOPENERR;
}
}
@ -2793,49 +3136,26 @@ read_header:
HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
}
/* This confuses the timestamping code that checks for file size.
#### The timestamping code should be smarter about file size. */
if (opt.save_headers && hs->restval == 0)
fwrite (head, 1, strlen (head), fp);
err = read_response_body (hs, sock, fp, contlen, contrange,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
warc_request_uuid, warc_ip, type,
statcode, head);
/* Now we no longer need to store the response header. */
xfree (head);
/* Download the request body. */
flags = 0;
if (contlen != -1)
/* If content-length is present, read that much; otherwise, read
until EOF. The HTTP spec doesn't require the server to
actually close the connection when it's done sending data. */
flags |= rb_read_exactly;
if (hs->restval > 0 && contrange == 0)
/* If the server ignored our range request, instruct fd_read_body
to skip the first RESTVAL bytes of body. */
flags |= rb_skip_startpos;
if (chunked_transfer_encoding)
flags |= rb_chunked_transfer_encoding;
hs->len = hs->restval;
hs->rd_size = 0;
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
flags);
xfree_null (type);
if (hs->res >= 0)
CLOSE_FINISH (sock);
else
{
if (hs->res < 0)
hs->rderrmsg = xstrdup (fd_errstr (sock));
CLOSE_INVALIDATE (sock);
}
CLOSE_INVALIDATE (sock);
if (!output_stream)
fclose (fp);
if (hs->res == -2)
return FWRITEERR;
return RETRFINISHED;
return err;
}
/* The genuine HTTP loop! This is the part where the retrieval is
@ -2859,6 +3179,12 @@ http_loop (struct url *u, struct url *original_url, char **newloc,
char *file_name;
bool force_full_retrieve = false;
/* If we are writing to a WARC file: always retrieve the whole file. */
if (opt.warc_filename != NULL)
force_full_retrieve = true;
#ifndef ENABLE_METALINK
/* Assert that no value for *LOCAL_FILE was passed. */
assert (local_file == NULL || *local_file == NULL);
@ -3047,6 +3373,18 @@ Spider mode enabled. Check if remote file exists.\n"));
/* Fatal errors just return from the function. */
ret = err;
goto exit;
case WARC_ERR:
/* A fatal WARC error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to WARC file.\n"));
ret = err;
goto exit;
case WARC_TMP_FOPENERR: case WARC_TMP_FWRITEERR:
/* A fatal WARC error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to temporary WARC file.\n"));
ret = err;
goto exit;
case CONSSLERR:
/* Another fatal error. */
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
@ -3575,19 +3913,23 @@ digest_authentication_encode (const char *au, const char *user,
const char *passwd, const char *method,
const char *path)
{
static char *realm, *opaque, *nonce;
static char *realm, *opaque, *nonce, *qop;
static struct {
const char *name;
char **variable;
} options[] = {
{ "realm", &realm },
{ "opaque", &opaque },
{ "nonce", &nonce }
{ "nonce", &nonce },
{ "qop", &qop }
};
char cnonce[16] = "";
char *res;
size_t res_size;
param_token name, value;
realm = opaque = nonce = NULL;
realm = opaque = nonce = qop = NULL;
au += 6; /* skip over `Digest' */
while (extract_param (&au, &name, &value, ','))
@ -3603,11 +3945,19 @@ digest_authentication_encode (const char *au, const char *user,
break;
}
}
if (qop != NULL && strcmp(qop,"auth"))
{
logprintf (LOG_NOTQUIET, _("Unsupported quality of protection '%s'.\n"), qop);
user = NULL; /* force freeing mem and return */
}
if (!realm || !nonce || !user || !passwd || !path || !method)
{
xfree_null (realm);
xfree_null (opaque);
xfree_null (nonce);
xfree_null (qop);
return NULL;
}
@ -3636,27 +3986,69 @@ digest_authentication_encode (const char *au, const char *user,
md5_finish_ctx (&ctx, hash);
dump_hash (a2buf, hash);
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
md5_init_ctx (&ctx);
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_finish_ctx (&ctx, hash);
if (!strcmp(qop,"auth"))
{
/* RFC 2617 Digest Access Authentication */
/* generate random hex string */
snprintf(cnonce, sizeof(cnonce), "%08x", random_number(INT_MAX));
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" noncecount ":" clientnonce ":" qop ": " A2BUF) */
md5_init_ctx (&ctx);
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)"00000001", 8, &ctx); /* TODO: keep track of server nonce values */
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)cnonce, strlen(cnonce), &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)qop, strlen(qop), &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_finish_ctx (&ctx, hash);
}
else
{
/* RFC 2069 Digest Access Authentication */
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
md5_init_ctx (&ctx);
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
md5_process_bytes ((unsigned char *)":", 1, &ctx);
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
md5_finish_ctx (&ctx, hash);
}
dump_hash (response_digest, hash);
res = xmalloc (strlen (user)
+ strlen (user)
+ strlen (realm)
+ strlen (nonce)
+ strlen (path)
+ 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
+ (opaque ? strlen (opaque) : 0)
+ 128);
sprintf (res, "Digest \
username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
user, realm, nonce, path, response_digest);
res_size = strlen (user)
+ strlen (user)
+ strlen (realm)
+ strlen (nonce)
+ strlen (path)
+ 2 * MD5_DIGEST_SIZE /*strlen (response_digest)*/
+ (opaque ? strlen (opaque) : 0)
+ (qop ? 128: 0)
+ 128;
res = xmalloc (res_size);
if (!strcmp(qop,"auth"))
{
snprintf (res, res_size, "Digest "\
"username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\""\
", qop=auth, nc=00000001, cnonce=\"%s\"",
user, realm, nonce, path, response_digest, cnonce);
}
else
{
snprintf (res, res_size, "Digest "\
"username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
user, realm, nonce, path, response_digest);
}
if (opaque)
{
char *p = res + strlen (res);

View File

@ -1,6 +1,6 @@
/* Reading/parsing the initialization file.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@ -30,6 +30,7 @@ shall include the source code for the parts of OpenSSL used as well
as that of the covered work. */
#include "wget.h"
#include "exits.h"
#include <stdio.h>
#include <stdlib.h>
@ -46,6 +47,10 @@ as that of the covered work. */
# endif
#endif
#include <regex.h>
#ifdef HAVE_LIBPCRE
# include <pcre.h>
#endif
#ifdef HAVE_PWD_H
# include <pwd.h>
@ -62,6 +67,7 @@ as that of the covered work. */
#include "res.h" /* for res_cleanup */
#include "http.h" /* for http_cleanup */
#include "retr.h" /* for output_stream */
#include "warc.h" /* for warc_close */
#ifdef TESTING
#include "test.h"
@ -88,12 +94,15 @@ CMD_DECLARE (cmd_vector);
CMD_DECLARE (cmd_spec_dirstruct);
CMD_DECLARE (cmd_spec_header);
CMD_DECLARE (cmd_spec_warc_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
CMD_DECLARE (cmd_spec_prefer_family);
CMD_DECLARE (cmd_spec_progress);
CMD_DECLARE (cmd_spec_recursive);
CMD_DECLARE (cmd_spec_regex_type);
CMD_DECLARE (cmd_spec_restrict_file_names);
CMD_DECLARE (cmd_spec_report_speed);
#ifdef HAVE_SSL
CMD_DECLARE (cmd_spec_secure_protocol);
#endif
@ -115,6 +124,7 @@ static const struct {
} commands[] = {
/* KEEP THIS LIST ALPHABETICALLY SORTED */
{ "accept", &opt.accepts, cmd_vector },
{ "acceptregex", &opt.acceptregex_s, cmd_string },
{ "addhostdir", &opt.add_hostdir, cmd_boolean },
{ "adjustextension", &opt.adjust_extension, cmd_boolean },
{ "alwaysrest", &opt.always_rest, cmd_boolean }, /* deprecated */
@ -220,7 +230,7 @@ static const struct {
{ "postdata", &opt.post_data, cmd_string },
{ "postfile", &opt.post_file_name, cmd_file },
{ "preferfamily", NULL, cmd_spec_prefer_family },
{ "preservepermissions", &opt.preserve_perm, cmd_boolean },/* deprecated */
{ "preservepermissions", &opt.preserve_perm, cmd_boolean },
#ifdef HAVE_SSL
{ "privatekey", &opt.private_key, cmd_file },
{ "privatekeytype", &opt.private_key_type, cmd_cert_type },
@ -240,10 +250,13 @@ static const struct {
{ "reclevel", &opt.reclevel, cmd_number_inf },
{ "recursive", NULL, cmd_spec_recursive },
{ "referer", &opt.referer, cmd_string },
{ "regextype", &opt.regex_type, cmd_spec_regex_type },
{ "reject", &opt.rejects, cmd_vector },
{ "rejectregex", &opt.rejectregex_s, cmd_string },
{ "relativeonly", &opt.relative_only, cmd_boolean },
{ "remoteencoding", &opt.encoding_remote, cmd_string },
{ "removelisting", &opt.remove_listing, cmd_boolean },
{ "reportspeed", &opt.report_bps, cmd_spec_report_speed},
{ "restrictfilenames", NULL, cmd_spec_restrict_file_names },
#ifdef ENABLE_METALINK
{ "retries", &opt.n_retries, cmd_number_inf },
@ -273,6 +286,17 @@ static const struct {
{ "verbose", NULL, cmd_spec_verbose },
{ "wait", &opt.wait, cmd_time },
{ "waitretry", &opt.waitretry, cmd_time },
{ "warccdx", &opt.warc_cdx_enabled, cmd_boolean },
{ "warccdxdedup", &opt.warc_cdx_dedup_filename, cmd_file },
#ifdef HAVE_LIBZ
{ "warccompression", &opt.warc_compression_enabled, cmd_boolean },
#endif
{ "warcdigests", &opt.warc_digests_enabled, cmd_boolean },
{ "warcfile", &opt.warc_filename, cmd_file },
{ "warcheader", NULL, cmd_spec_warc_header },
{ "warckeeplog", &opt.warc_keep_log, cmd_boolean },
{ "warcmaxsize", &opt.warc_maxsize, cmd_bytes },
{ "warctempdir", &opt.warc_tempdir, cmd_directory },
#ifdef USE_WATT32
{ "wdebug", &opt.wdebug, cmd_boolean },
#endif
@ -360,6 +384,8 @@ defaults (void)
opt.restrict_files_nonascii = false;
opt.restrict_files_case = restrict_no_case_restriction;
opt.regex_type = regex_type_posix;
opt.max_redirect = 20;
opt.waitretry = 10;
@ -374,6 +400,18 @@ defaults (void)
opt.useservertimestamps = true;
opt.show_all_dns_entries = false;
opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
#ifdef HAVE_LIBZ
opt.warc_compression_enabled = true;
#else
opt.warc_compression_enabled = false;
#endif
opt.warc_digests_enabled = true;
opt.warc_cdx_enabled = false;
opt.warc_cdx_dedup_filename = NULL;
opt.warc_tempdir = NULL;
opt.warc_keep_log = true;
}
/* Return the user's home directory (strdup-ed), or NULL if none is
@ -456,7 +494,7 @@ wgetrc_env_file_name (void)
return NULL;
}
/* Check for the existance of '$HOME/.wgetrc' and return it's path
/* Check for the existance of '$HOME/.wgetrc' and return its path
if it exists and is set. */
char *
wgetrc_user_file_name (void)
@ -610,21 +648,34 @@ initialize (void)
variable has been set. For internal testing purposes only! */
env_sysrc = getenv ("SYSTEM_WGETRC");
if (env_sysrc && file_exists_p (env_sysrc))
ok &= run_wgetrc (env_sysrc);
{
ok &= run_wgetrc (env_sysrc);
/* If there are any problems parsing the system wgetrc file, tell
the user and exit */
if (! ok)
{
fprintf (stderr, _("\
Parsing system wgetrc file (env SYSTEM_WGETRC) failed. Please check\n\
'%s',\n\
or specify a different file using --config.\n"), env_sysrc);
exit (2);
}
}
/* Otherwise, if SYSTEM_WGETRC is defined, use it. */
#ifdef SYSTEM_WGETRC
else if (file_exists_p (SYSTEM_WGETRC))
ok &= run_wgetrc (SYSTEM_WGETRC);
#endif
/* If there are any problems parsing the system wgetrc file, tell
the user and exit */
if (! ok)
{
fprintf (stderr, _("\
Parsing system wgetrc file failed, please check '%s'. \
Or specify a different file using --config\n"), SYSTEM_WGETRC);
Parsing system wgetrc file failed. Please check\n\
'%s',\n\
or specify a different file using --config.\n"), SYSTEM_WGETRC);
exit (2);
}
#endif
/* Override it with your own, if one exists. */
file = wgetrc_file_name ();
if (!file)
@ -1234,6 +1285,27 @@ cmd_spec_header (const char *com, const char *val, void *place_ignored)
return true;
}
static bool
cmd_spec_warc_header (const char *com, const char *val, void *place_ignored)
{
/* Empty value means reset the list of headers. */
if (*val == '\0')
{
free_vec (opt.warc_user_headers);
opt.warc_user_headers = NULL;
return true;
}
if (!check_user_specified_header (val))
{
fprintf (stderr, _("%s: %s: Invalid WARC header %s.\n"),
exec_name, com, quote (val));
return false;
}
opt.warc_user_headers = vec_append (opt.warc_user_headers, val);
return true;
}
static bool
cmd_spec_htmlify (const char *com, const char *val, void *place_ignored)
{
@ -1321,6 +1393,25 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored)
return true;
}
/* Validate --regex-type and set the choice. */
static bool
cmd_spec_regex_type (const char *com, const char *val, void *place_ignored)
{
static const struct decode_item choices[] = {
{ "posix", regex_type_posix },
#ifdef HAVE_LIBPCRE
{ "pcre", regex_type_pcre },
#endif
};
int regex_type = regex_type_posix;
int ok = decode_string (val, choices, countof (choices), &regex_type);
if (!ok)
fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
opt.regex_type = regex_type;
return ok;
}
static bool
cmd_spec_restrict_file_names (const char *com, const char *val, void *place_ignored)
{
@ -1375,6 +1466,15 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
return true;
}
static bool
cmd_spec_report_speed (const char *com, const char *val, void *place_ignored)
{
opt.report_bps = strcasecmp (val, "bits") == 0;
if (!opt.report_bps)
fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
return opt.report_bps;
}
#ifdef HAVE_SSL
static bool
cmd_spec_secure_protocol (const char *com, const char *val, void *place)
@ -1589,8 +1689,16 @@ cleanup (void)
{
/* Free external resources, close files, etc. */
/* Close WARC file. */
if (opt.warc_filename != 0)
warc_close ();
log_close ();
if (output_stream)
fclose (output_stream);
if (fclose (output_stream) == EOF)
inform_exit_status (CLOSEFAILED);
/* No need to check for error because Wget flushes its output (and
checks for errors) after any data arrives. */
@ -1610,6 +1718,9 @@ cleanup (void)
host_cleanup ();
log_cleanup ();
for (i = 0; i < nurl; i++)
xfree (url[i]);
{
extern acc_t *netrc_list;
free_netrc (netrc_list);
@ -1638,6 +1749,7 @@ cleanup (void)
xfree_null (opt.http_user);
xfree_null (opt.http_passwd);
free_vec (opt.user_headers);
free_vec (opt.warc_user_headers);
# ifdef HAVE_SSL
xfree_null (opt.cert_file);
xfree_null (opt.private_key);

View File

@ -79,6 +79,10 @@ as that of the covered work. */
logging is inhibited, logfp is set back to NULL. */
static FILE *logfp;
/* A second file descriptor pointing to the temporary log file for the
WARC writer. If WARC writing is disabled, this is NULL. */
static FILE *warclogfp;
/* If true, it means logging is inhibited, i.e. nothing is printed or
stored. */
static bool inhibit_logging;
@ -304,6 +308,31 @@ get_log_fp (void)
return logfp;
return stderr;
}
/* Returns the file descriptor for the secondary log file. This is
WARCLOGFP, except if called before log_init, in which case it
returns stderr. This is useful in case someone calls a logging
function before log_init.
If logging is inhibited, return NULL. */
static FILE *
get_warc_log_fp (void)
{
if (inhibit_logging)
return NULL;
if (warclogfp)
return warclogfp;
return NULL;
}
/* Sets the file descriptor for the secondary log file. */
void
log_set_warc_log_fp (FILE * fp)
{
warclogfp = fp;
}
/* Log a literal string S. The string is logged as-is, without a
newline appended. */
@ -312,13 +341,17 @@ void
logputs (enum log_options o, const char *s)
{
FILE *fp;
FILE *warcfp;
check_redirect_output ();
if ((fp = get_log_fp ()) == NULL)
return;
warcfp = get_warc_log_fp ();
CHECK_VERBOSE (o);
FPUTS (s, fp);
if (warcfp != NULL)
FPUTS (s, warcfp);
if (save_context_p)
saved_append (s);
if (flush_log_p)
@ -356,8 +389,9 @@ log_vprintf_internal (struct logvprintf_state *state, const char *fmt,
int available_size = sizeof (smallmsg);
int numwritten;
FILE *fp = get_log_fp ();
FILE *warcfp = get_warc_log_fp ();
if (!save_context_p)
if (!save_context_p && warcfp == NULL)
{
/* In the simple case just call vfprintf(), to avoid needless
allocation and games with vsnprintf(). */
@ -407,8 +441,11 @@ log_vprintf_internal (struct logvprintf_state *state, const char *fmt,
}
/* Writing succeeded. */
saved_append (write_ptr);
if (save_context_p)
saved_append (write_ptr);
FPUTS (write_ptr, fp);
if (warcfp != NULL)
FPUTS (write_ptr, warcfp);
if (state->bigmsg)
xfree (state->bigmsg);
@ -426,6 +463,7 @@ void
logflush (void)
{
FILE *fp = get_log_fp ();
FILE *warcfp = get_warc_log_fp ();
if (fp)
{
/* 2005-10-25 SMS.
@ -440,6 +478,10 @@ logflush (void)
fflush (fp);
#endif /* def __VMS [else] */
}
if (warcfp != NULL)
fflush (warcfp);
needs_flushing = false;
}
@ -573,14 +615,14 @@ log_init (const char *file, bool appendp)
}
}
/* Close LOGFP, inhibit further logging and free the memory associated
with it. */
/* Close LOGFP (only if we opened it, not if it's stderr), inhibit
further logging and free the memory associated with it. */
void
log_close (void)
{
int i;
if (logfp)
if (logfp && (logfp != stderr))
fclose (logfp);
logfp = NULL;
inhibit_logging = true;
@ -598,6 +640,7 @@ log_dump_context (void)
{
int num = log_line_current;
FILE *fp = get_log_fp ();
FILE *warcfp = get_warc_log_fp ();
if (!fp)
return;
@ -609,14 +652,23 @@ log_dump_context (void)
{
struct log_ln *ln = log_lines + num;
if (ln->content)
FPUTS (ln->content, fp);
{
FPUTS (ln->content, fp);
if (warcfp != NULL)
FPUTS (ln->content, warcfp);
}
ROT_ADVANCE (num);
}
while (num != log_line_current);
if (trailing_line)
if (log_lines[log_line_current].content)
FPUTS (log_lines[log_line_current].content, fp);
{
FPUTS (log_lines[log_line_current].content, fp);
if (warcfp != NULL)
FPUTS (log_lines[log_line_current].content, warcfp);
}
fflush (fp);
fflush (warcfp);
}
/* String escape functions. */

View File

@ -34,8 +34,12 @@ as that of the covered work. */
/* The log file to which Wget writes to after HUP. */
#define DEFAULT_LOGFILE "wget-log"
#include <stdio.h>
enum log_options { LOG_VERBOSE, LOG_NOTQUIET, LOG_NONVERBOSE, LOG_ALWAYS };
void log_set_warc_log_fp (FILE *);
void logprintf (enum log_options, const char *, ...)
GCC_FORMAT_ATTR (2, 3);
void debug_logprintf (const char *, ...) GCC_FORMAT_ATTR (1, 2);

View File

@ -1,6 +1,6 @@
/* Command line parsing.
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of GNU Wget.
@ -55,7 +55,7 @@ as that of the covered work. */
#include "spider.h"
#include "http.h" /* for save_cookies */
#include "ptimer.h"
#include "warc.h"
#include <getopt.h>
#include <getpass.h>
#include <quote.h>
@ -157,6 +157,7 @@ struct cmdline_option {
static struct cmdline_option option_data[] =
{
{ "accept", 'A', OPT_VALUE, "accept", -1 },
{ "accept-regex", 0, OPT_VALUE, "acceptregex", -1 },
{ "adjust-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 },
{ "append-output", 'a', OPT__APPEND_OUTPUT, NULL, required_argument },
{ "ask-password", 0, OPT_BOOLEAN, "askpassword", -1 },
@ -249,7 +250,7 @@ static struct cmdline_option option_data[] =
{ "post-data", 0, OPT_VALUE, "postdata", -1 },
{ "post-file", 0, OPT_VALUE, "postfile", -1 },
{ "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
{ "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 }, /* deprecated */
{ "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
{ IF_SSL ("private-key"), 0, OPT_VALUE, "privatekey", -1 },
{ IF_SSL ("private-key-type"), 0, OPT_VALUE, "privatekeytype", -1 },
{ "progress", 0, OPT_VALUE, "progress", -1 },
@ -266,10 +267,13 @@ static struct cmdline_option option_data[] =
{ "read-timeout", 0, OPT_VALUE, "readtimeout", -1 },
{ "recursive", 'r', OPT_BOOLEAN, "recursive", -1 },
{ "referer", 0, OPT_VALUE, "referer", -1 },
{ "regex-type", 0, OPT_VALUE, "regextype", -1 },
{ "reject", 'R', OPT_VALUE, "reject", -1 },
{ "reject-regex", 0, OPT_VALUE, "rejectregex", -1 },
{ "relative", 'L', OPT_BOOLEAN, "relativeonly", -1 },
{ "remote-encoding", 0, OPT_VALUE, "remoteencoding", -1 },
{ "remove-listing", 0, OPT_BOOLEAN, "removelisting", -1 },
{ "report-speed", 0, OPT_BOOLEAN, "reportspeed", -1 },
{ "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
{ "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
#ifdef ENABLE_METALINK
@ -296,6 +300,17 @@ static struct cmdline_option option_data[] =
{ "version", 'V', OPT_FUNCALL, (void *) print_version, no_argument },
{ "wait", 'w', OPT_VALUE, "wait", -1 },
{ "waitretry", 0, OPT_VALUE, "waitretry", -1 },
{ "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
#ifdef HAVE_LIBZ
{ "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
#endif
{ "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
{ "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
{ "warc-file", 0, OPT_VALUE, "warcfile", -1 },
{ "warc-header", 0, OPT_VALUE, "warcheader", -1 },
{ "warc-keep-log", 0, OPT_BOOLEAN, "warckeeplog", -1 },
{ "warc-max-size", 0, OPT_VALUE, "warcmaxsize", -1 },
{ "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
#ifdef USE_WATT32
{ "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
#endif
@ -453,6 +468,8 @@ Logging and input file:\n"),
-v, --verbose be verbose (this is the default).\n"),
N_("\
-nv, --no-verbose turn off verboseness, without being quiet.\n"),
N_("\
--report-speed=TYPE Output bandwidth as TYPE. TYPE can be bits.\n"),
N_("\
-i, --input-file=FILE download URLs found in local or external FILE.\n"),
N_("\
@ -654,10 +671,37 @@ FTP options:\n"),
--no-glob turn off FTP file name globbing.\n"),
N_("\
--no-passive-ftp disable the \"passive\" transfer mode.\n"),
N_("\
--preserve-permissions preserve remote file permissions.\n"),
N_("\
--retr-symlinks when recursing, get linked-to files (not dir).\n"),
"\n",
N_("\
WARC options:\n"),
N_("\
--warc-file=FILENAME save request/response data to a .warc.gz file.\n"),
N_("\
--warc-header=STRING insert STRING into the warcinfo record.\n"),
N_("\
--warc-max-size=NUMBER set maximum size of WARC files to NUMBER.\n"),
N_("\
--warc-cdx write CDX index files.\n"),
N_("\
--warc-dedup=FILENAME do not store records listed in this CDX file.\n"),
#ifdef HAVE_LIBZ
N_("\
--no-warc-compression do not compress WARC files with GZIP.\n"),
#endif
N_("\
--no-warc-digests do not calculate SHA1 digests.\n"),
N_("\
--no-warc-keep-log do not store the log file in a WARC record.\n"),
N_("\
--warc-tempdir=DIRECTORY location for temporary files created by the\n\
WARC writer.\n"),
"\n",
N_("\
Recursive download:\n"),
N_("\
@ -694,6 +738,17 @@ Recursive accept/reject:\n"),
-A, --accept=LIST comma-separated list of accepted extensions.\n"),
N_("\
-R, --reject=LIST comma-separated list of rejected extensions.\n"),
N_("\
--accept-regex=REGEX regex matching accepted URLs.\n"),
N_("\
--reject-regex=REGEX regex matching rejected URLs.\n"),
#ifdef HAVE_LIBPCRE
N_("\
--regex-type=TYPE regex type (posix|pcre).\n"),
#else
N_("\
--regex-type=TYPE regex type (posix).\n"),
#endif
N_("\
-D, --domains=LIST comma-separated list of accepted domains.\n"),
N_("\
@ -718,7 +773,6 @@ Recursive accept/reject:\n"),
N_("\
-np, --no-parent don't ascend to the parent directory.\n"),
"\n",
N_("Mail bug reports and suggestions to <bug-wget@gnu.org>.\n")
};
@ -920,6 +974,7 @@ There is NO WARRANTY, to the extent permitted by law.\n"), stdout) < 0)
}
char *program_name; /* Needed by lib/error.c. */
char *program_argstring; /* Needed by wget_warc.c. */
int
main (int argc, char **argv)
@ -955,13 +1010,34 @@ main (int argc, char **argv)
windows_main ((char **) &exec_name);
#endif
/* Construct the arguments string. */
int argstring_length = 1;
for (i = 1; i < argc; i++)
argstring_length += strlen (argv[i]) + 2 + 1;
char *p = program_argstring = malloc (argstring_length * sizeof (char));
if (p == NULL)
{
fprintf (stderr, _("Memory allocation problem\n"));
exit (2);
}
for (i = 1; i < argc; i++)
{
*p++ = '"';
int arglen = strlen (argv[i]);
memcpy (p, argv[i], arglen);
p += arglen;
*p++ = '"';
*p++ = ' ';
}
*p = '\0';
/* Load the hard-coded defaults. */
defaults ();
init_switches ();
/* This seperate getopt_long is needed to find the user config
and parse it before the other user options. */
/* This separate getopt_long is needed to find the user config file
option ("--config") and parse it before the other user options. */
longindex = -1;
int retconf;
bool use_userconfig = false;
@ -972,20 +1048,25 @@ main (int argc, char **argv)
int confval;
bool userrc_ret = true;
struct cmdline_option *config_opt;
confval = long_options[longindex].val;
config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
if (strcmp (config_opt->long_name, "config") == 0)
/* There is no short option for "--config". */
if (longindex >= 0)
{
userrc_ret &= run_wgetrc (optarg);
use_userconfig = true;
confval = long_options[longindex].val;
config_opt = &option_data[confval & ~BOOLEAN_NEG_MARKER];
if (strcmp (config_opt->long_name, "config") == 0)
{
userrc_ret &= run_wgetrc (optarg);
use_userconfig = true;
}
if (!userrc_ret)
{
fprintf (stderr, "Exiting due to error in %s\n", optarg);
exit (2);
}
else
break;
}
if (!userrc_ret)
{
printf ("Exiting due to error in %s\n", optarg);
exit (2);
}
else
break;
}
/* If the user did not specify a config, read the system wgetrc and ~/.wgetrc. */
@ -1008,9 +1089,10 @@ main (int argc, char **argv)
{
if (ret == '?')
{
print_usage (0);
printf ("\n");
printf (_("Try `%s --help' for more options.\n"), exec_name);
print_usage (1);
fprintf (stderr, "\n");
fprintf (stderr, _("Try `%s --help' for more options.\n"),
exec_name);
exit (2);
}
/* Find the short option character in the mapping. */
@ -1118,7 +1200,7 @@ main (int argc, char **argv)
{
fprintf (stderr,
_("Both --no-clobber and --convert-links were specified,"
"only --convert-links will be used.\n"));
" only --convert-links will be used.\n"));
opt.noclobber = false;
}
@ -1199,6 +1281,47 @@ for details.\n\n"));
}
}
if (opt.warc_filename != 0)
{
if (opt.noclobber)
{
fprintf (stderr,
_("WARC output does not work with --no-clobber, "
"--no-clobber will be disabled.\n"));
opt.noclobber = false;
}
if (opt.timestamping)
{
fprintf (stderr,
_("WARC output does not work with timestamping, "
"timestamping will be disabled.\n"));
opt.timestamping = false;
}
if (opt.spider)
{
fprintf (stderr,
_("WARC output does not work with --spider.\n"));
exit (1);
}
if (opt.always_rest)
{
fprintf (stderr,
_("WARC output does not work with --continue, "
"--continue will be disabled.\n"));
opt.always_rest = false;
}
if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled)
{
fprintf (stderr,
_("Digests are disabled; WARC deduplication will "
"not find duplicate records.\n"));
}
if (opt.warc_keep_log)
{
opt.progress_type = xstrdup ("dot");
}
}
if (opt.ask_passwd && opt.passwd)
{
fprintf (stderr,
@ -1216,13 +1339,42 @@ for details.\n\n"));
/* No URL specified. */
fprintf (stderr, _("%s: missing URL\n"), exec_name);
print_usage (1);
printf ("\n");
fprintf (stderr, "\n");
/* #### Something nicer should be printed here -- similar to the
pre-1.5 `--help' page. */
fprintf (stderr, _("Try `%s --help' for more options.\n"), exec_name);
exit (1);
}
/* Compile the regular expressions. */
switch (opt.regex_type)
{
#ifdef HAVE_LIBPCRE
case regex_type_pcre:
opt.regex_compile_fun = compile_pcre_regex;
opt.regex_match_fun = match_pcre_regex;
break;
#endif
case regex_type_posix:
default:
opt.regex_compile_fun = compile_posix_regex;
opt.regex_match_fun = match_posix_regex;
break;
}
if (opt.acceptregex_s)
{
opt.acceptregex = opt.regex_compile_fun (opt.acceptregex_s);
if (!opt.acceptregex)
exit (1);
}
if (opt.rejectregex_s)
{
opt.rejectregex = opt.regex_compile_fun (opt.rejectregex_s);
if (!opt.rejectregex)
exit (1);
}
#ifdef ENABLE_IRI
if (opt.enable_iri)
{
@ -1269,6 +1421,11 @@ for details.\n\n"));
/* Fill in the arguments. */
url = alloca_array (char *, nurl + 1);
if (url == NULL)
{
fprintf (stderr, _("Memory allocation problem\n"));
exit (2);
}
for (i = 0; i < nurl; i++, optind++)
{
char *rewritten = rewrite_shorthand_url (argv[optind]);
@ -1282,6 +1439,10 @@ for details.\n\n"));
/* Initialize logging. */
log_init (opt.lfilename, append_to_log);
/* Open WARC file. */
if (opt.warc_filename != 0)
warc_init ();
DEBUGP (("DEBUG output created by Wget %s on %s.\n\n",
version_string, OS_TYPE));
@ -1437,7 +1598,7 @@ outputting to a regular file.\n"));
&dt, opt.recursive, iri, true, NULL);
}
if (opt.delete_after && file_exists_p(filename))
if (opt.delete_after && filename != NULL && file_exists_p (filename))
{
DEBUGP (("Removing file due to --delete-after in main():\n"));
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
@ -1524,12 +1685,9 @@ outputting to a regular file.\n"));
if (opt.convert_links && !opt.delete_after)
convert_all_links ();
log_close ();
for (i = 0; i < nurl; i++)
xfree (url[i]);
cleanup ();
return get_exit_status ();
exit (get_exit_status ());
}
#endif /* TESTING */

View File

@ -1,6 +1,6 @@
/* SSL support via OpenSSL library.
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
2009, 2010, 2011 Free Software Foundation, Inc.
2009, 2010, 2011, 2012 Free Software Foundation, Inc.
Originally contributed by Christian Fraenkel.
This file is part of GNU Wget.
@ -159,7 +159,7 @@ key_type_to_ssl_type (enum keyfile_type type)
Returns true on success, false otherwise. */
bool
ssl_init ()
ssl_init (void)
{
SSL_METHOD const *meth;
@ -201,7 +201,9 @@ ssl_init ()
abort ();
}
ssl_ctx = SSL_CTX_new (meth);
/* The type cast below accommodates older OpenSSL versions (0.9.8)
where SSL_CTX_new() is declared without a "const" argument. */
ssl_ctx = SSL_CTX_new ((SSL_METHOD *)meth);
if (!ssl_ctx)
goto error;
@ -393,7 +395,7 @@ static struct transport_implementation openssl_transport = {
Returns true on success, false on failure. */
bool
ssl_connect_wget (int fd)
ssl_connect_wget (int fd, const char *hostname)
{
SSL *conn;
struct openssl_transport_context *ctx;
@ -404,6 +406,19 @@ ssl_connect_wget (int fd)
conn = SSL_new (ssl_ctx);
if (!conn)
goto error;
#if OPENSSL_VERSION_NUMBER >= 0x0090806fL && !defined(OPENSSL_NO_TLSEXT)
/* If the SSL library was build with support for ServerNameIndication
then use it whenever we have a hostname. If not, don't, ever. */
if (! is_valid_ip_address (hostname))
{
if (! SSL_set_tlsext_host_name (conn, hostname))
{
DEBUGP (("Failed to set TLS server-name indication."));
goto error;
}
}
#endif
#ifndef FD_TO_SOCKET
# define FD_TO_SOCKET(X) (X)
#endif

View File

@ -78,6 +78,19 @@ struct options
bool ignore_case; /* Whether to ignore case when
matching dirs and files */
char *acceptregex_s; /* Patterns to accept (a regex string). */
char *rejectregex_s; /* Patterns to reject (a regex string). */
void *acceptregex; /* Patterns to accept (a regex struct). */
void *rejectregex; /* Patterns to reject (a regex struct). */
enum {
#ifdef HAVE_LIBPCRE
regex_type_pcre,
#endif
regex_type_posix
} regex_type; /* The regex library. */
void *(*regex_compile_fun)(const char *); /* Function to compile a regex. */
bool (*regex_match_fun)(const void *, const char *); /* Function to match a string to a regex. */
char **domains; /* See host.c */
char **exclude_domains;
bool dns_cache; /* whether we cache DNS lookups. */
@ -91,6 +104,15 @@ struct options
FTP. */
char *output_document; /* The output file to which the
documents will be printed. */
char *warc_filename; /* WARC output filename */
char *warc_tempdir; /* WARC temp dir */
char *warc_cdx_dedup_filename; /* CDX file to be used for deduplication. */
wgint warc_maxsize; /* WARC max archive size */
bool warc_compression_enabled; /* For GZIP compression. */
bool warc_digests_enabled; /* For SHA1 digests. */
bool warc_cdx_enabled; /* Create CDX files? */
bool warc_keep_log; /* Store the log file in a WARC record. */
char **warc_user_headers; /* User-defined WARC header(s). */
char *user; /* Generic username */
char *passwd; /* Generic password */
@ -261,6 +283,9 @@ struct options
bool show_all_dns_entries; /* Show all the DNS entries when resolving a
name. */
bool report_bps; /*Output bandwidth in bits format*/
#ifdef ENABLE_THREADS
int jobs; /* How many threads use at the same time. */
#endif

View File

@ -766,7 +766,7 @@ update_speed_ring (struct bar_progress *bp, wgint howmuch, double dltime)
}
#if USE_NLS_PROGRESS_BAR
int
static int
count_cols (const char *mbs)
{
wchar_t wc;
@ -795,7 +795,7 @@ count_cols (const char *mbs)
# define count_cols(mbs) ((int)(strlen(mbs)))
#endif
const char *
static const char *
get_eta (int *bcd)
{
/* TRANSLATORS: "ETA" is English-centric, but this must
@ -861,7 +861,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
struct bar_progress_hist *hist = &bp->hist;
/* The progress bar should look like this:
xx% [=======> ] nn,nnn 12.34K/s eta 36m 51s
xx% [=======> ] nn,nnn 12.34KB/s eta 36m 51s
Calculate the geometry. The idea is to assign as much room as
possible to the progress bar. The other idea is to never let
@ -873,7 +873,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
"xx% " or "100%" - percentage - 4 chars
"[]" - progress bar decorations - 2 chars
" nnn,nnn,nnn" - downloaded bytes - 12 chars or very rarely more
" 12.5K/s" - download rate - 8 chars
" 12.5KB/s" - download rate - 9 chars
" eta 36m 51s" - ETA - 14 chars
"=====>..." - progress bar - the rest
@ -977,10 +977,11 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
*p++ = ' ';
}
/* " 12.52K/s" */
/* " 12.52Kb/s or 12.52KB/s" */
if (hist->total_time > 0 && hist->total_bytes)
{
static const char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
static const char *short_units[] = { "B/s", "KB/s", "MB/s", "GB/s" };
static const char *short_units_bits[] = { "b/s", "Kb/s", "Mb/s", "Gb/s" };
int units = 0;
/* Calculate the download speed using the history ring and
recent data that hasn't made it to the ring yet. */
@ -988,7 +989,7 @@ create_image (struct bar_progress *bp, double dl_total_time, bool done)
double dltime = hist->total_time + (dl_total_time - bp->recent_start);
double dlspeed = calc_rate (dlquant, dltime, &units);
sprintf (p, " %4.*f%s", dlspeed >= 99.95 ? 0 : dlspeed >= 9.995 ? 1 : 2,
dlspeed, short_units[units]);
dlspeed, !opt.report_bps ? short_units[units] : short_units_bits[units]);
move_to_end (p);
}
else

View File

@ -59,9 +59,7 @@ as that of the covered work. */
#include <errno.h>
#include <unistd.h>
#include <time.h>
#ifdef HAVE_SYS_TIME_H
# include <sys/time.h>
#endif
#include <sys/time.h>
/* Cygwin currently (as of 2005-04-08, Cygwin 1.5.14) lacks clock_getres,
but still defines _POSIX_TIMERS! Because of that we simply use the

View File

@ -763,6 +763,11 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
goto out;
}
}
if (!accept_url (url))
{
DEBUGP (("%s is excluded/not-included through regex.\n", url));
goto out;
}
/* 6. Check for acceptance/rejection rules. We ignore these rules
for directories (no file name to match) and for non-leaf HTMLs,

View File

@ -162,13 +162,16 @@ limit_bandwidth (wgint bytes, struct ptimer *timer)
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
amount of data and decrease SKIP. Increment *TOTAL by the amount
of data written. */
of data written. If OUT2 is not NULL, also write BUF to OUT2.
In case of error writing to OUT, -1 is returned. In case of error
writing to OUT2, -2 is returned. In case of any other error,
1 is returned. */
static int
write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
wgint *written)
write_data (FILE *out, FILE *out2, const char *buf, int bufsize,
wgint *skip, wgint *written)
{
if (!out)
if (out == NULL && out2 == NULL)
return 1;
if (*skip > bufsize)
{
@ -184,7 +187,10 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
return 1;
}
fwrite (buf, 1, bufsize, out);
if (out != NULL)
fwrite (buf, 1, bufsize, out);
if (out2 != NULL)
fwrite (buf, 1, bufsize, out2);
*written += bufsize;
/* Immediately flush the downloaded data. This should not hinder
@ -201,9 +207,17 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
actual justification. (Also, why 16K? Anyone test other values?)
*/
#ifndef __VMS
fflush (out);
if (out != NULL)
fflush (out);
if (out2 != NULL)
fflush (out2);
#endif /* ndef __VMS */
return !ferror (out);
if (out != NULL && ferror (out))
return -1;
else if (out2 != NULL && ferror (out2))
return -2;
else
return 0;
}
/* Read the contents of file descriptor FD until it the connection
@ -221,13 +235,20 @@ write_data (FILE *out, const char *buf, int bufsize, wgint *skip,
the amount of data written to disk. The time it took to download
the data is stored to ELAPSED.
If OUT2 is non-NULL, the contents is also written to OUT2.
OUT2 will get an exact copy of the response: if this is a chunked
response, everything -- including the chunk headers -- is written
to OUT2. (OUT will only get the unchunked response.)
The function exits and returns the amount of data read. In case of
error while reading data, -1 is returned. In case of error while
writing data, -2 is returned. */
writing data to OUT, -2 is returned. In case of error while writing
data to OUT2, -3 is returned. */
int
fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
wgint *qtyread, wgint *qtywritten, double *elapsed, int flags)
wgint *qtyread, wgint *qtywritten, double *elapsed, int flags,
FILE *out2)
{
int ret = 0;
#undef max
@ -310,13 +331,24 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
ret = -1;
break;
}
else if (out2 != NULL)
fwrite (line, 1, strlen (line), out2);
remaining_chunk_size = strtol (line, &endl, 16);
xfree (line);
if (remaining_chunk_size == 0)
{
ret = 0;
if (fd_read_line (fd) == NULL)
line = fd_read_line (fd);
if (line == NULL)
ret = -1;
else
{
if (out2 != NULL)
fwrite (line, 1, strlen (line), out2);
xfree (line);
}
break;
}
}
@ -366,20 +398,30 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
if (ret > 0)
{
sum_read += ret;
if (!write_data (out, dlbuf, ret, &skip, &sum_written))
int write_res = write_data (out, out2, dlbuf, ret, &skip, &sum_written);
if (write_res != 0)
{
ret = -2;
ret = (write_res == -3) ? -3 : -2;
goto out;
}
if (chunked)
{
remaining_chunk_size -= ret;
if (remaining_chunk_size == 0)
if (fd_read_line (fd) == NULL)
{
ret = -1;
break;
}
{
char *line = fd_read_line (fd);
if (line == NULL)
{
ret = -1;
break;
}
else
{
if (out2 != NULL)
fwrite (line, 1, strlen (line), out2);
xfree (line);
}
}
}
}
@ -601,6 +643,7 @@ retr_rate (wgint bytes, double secs)
{
static char res[20];
static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
static const char *rate_names_bits[] = {"b/s", "Kb/s", "Mb/s", "Gb/s" };
int units;
double dlrate = calc_rate (bytes, secs, &units);
@ -608,7 +651,7 @@ retr_rate (wgint bytes, double secs)
e.g. "1022", "247", "12.5", "2.38". */
sprintf (res, "%.*f %s",
dlrate >= 99.95 ? 0 : dlrate >= 9.995 ? 1 : 2,
dlrate, rate_names[units]);
dlrate, !opt.report_bps ? rate_names[units]: rate_names_bits[units]);
return res;
}
@ -625,6 +668,11 @@ double
calc_rate (wgint bytes, double secs, int *units)
{
double dlrate;
double bibyte = 1000.0;
if (!opt.report_bps)
bibyte = 1024.0;
assert (secs >= 0);
assert (bytes >= 0);
@ -636,16 +684,17 @@ calc_rate (wgint bytes, double secs, int *units)
0 and the timer's resolution, assume half the resolution. */
secs = ptimer_resolution () / 2.0;
dlrate = bytes / secs;
if (dlrate < 1024.0)
dlrate = convert_to_bits (bytes) / secs;
if (dlrate < bibyte)
*units = 0;
else if (dlrate < 1024.0 * 1024.0)
*units = 1, dlrate /= 1024.0;
else if (dlrate < 1024.0 * 1024.0 * 1024.0)
*units = 2, dlrate /= (1024.0 * 1024.0);
else if (dlrate < (bibyte * bibyte))
*units = 1, dlrate /= bibyte;
else if (dlrate < (bibyte * bibyte * bibyte))
*units = 2, dlrate /= (bibyte * bibyte);
else
/* Maybe someone will need this, one day. */
*units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
*units = 3, dlrate /= (bibyte * bibyte * bibyte);
return dlrate;
}
@ -911,10 +960,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
register_html (u->url, local_file);
register_html (local_file);
if (*dt & TEXTCSS)
register_css (u->url, local_file);
register_css (local_file);
}
if (file)

View File

@ -50,7 +50,7 @@ enum {
rb_chunked_transfer_encoding = 4
};
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int, FILE *);
typedef const char *(*hunk_terminator_t) (const char *, const char *, int);

View File

@ -45,7 +45,7 @@ static struct hash_table *nonexisting_urls_set;
/* Cleanup the data structures associated with this file. */
void
static void
spider_cleanup (void)
{
if (nonexisting_urls_set)

View File

@ -1,6 +1,6 @@
/* SSL support.
Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
2009, 2010, 2011 Free Software Foundation, Inc.
2009, 2010, 2011, 2012 Free Software Foundation, Inc.
Originally contributed by Christian Fraenkel.
This file is part of GNU Wget.
@ -33,7 +33,7 @@ as that of the covered work. */
#define GEN_SSLFUNC_H
bool ssl_init (void);
bool ssl_connect_wget (int);
bool ssl_connect_wget (int, const char *);
bool ssl_check_certificate (int, const char *);
#endif /* GEN_SSLFUNC_H */

View File

@ -46,6 +46,8 @@ const char *test_append_uri_pathel();
const char *test_are_urls_equal();
const char *test_is_robots_txt_url();
const char *program_argstring = "TEST";
int tests_run;
static const char *

View File

@ -1503,9 +1503,9 @@ url_file_name (const struct url *u, char *replaced_filename)
{
struct growable fnres; /* stands for "file name result" */
const char *u_file, *u_query;
const char *u_file;
char *fname, *unique;
char *index_filename = "index.html"; /* The default index file is index.html */
const char *index_filename = "index.html"; /* The default index file is index.html */
fnres.base = NULL;
fnres.size = 0;
@ -1562,12 +1562,11 @@ url_file_name (const struct url *u, char *replaced_filename)
u_file = *u->file ? u->file : index_filename;
append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
/* Append "?query" to the file name. */
u_query = u->query && *u->query ? u->query : NULL;
if (u_query)
/* Append "?query" to the file name, even if empty */
if (u->query)
{
append_char (FN_QUERY_SEP, &fnres);
append_uri_pathel (u_query, u_query + strlen (u_query),
append_uri_pathel (u->query, u->query + strlen (u->query),
true, &fnres);
}
}

View File

@ -59,12 +59,12 @@ as that of the covered work. */
# endif
#endif
#include <sys/time.h>
#include <sys/stat.h>
/* For TIOCGWINSZ and friends: */
#ifdef HAVE_SYS_IOCTL_H
# include <sys/ioctl.h>
#endif
#include <sys/ioctl.h>
#ifdef HAVE_TERMIOS_H
# include <termios.h>
#endif
@ -73,6 +73,11 @@ as that of the covered work. */
#include <signal.h>
#include <setjmp.h>
#include <regex.h>
#ifdef HAVE_LIBPCRE
# include <pcre.h>
#endif
#ifndef HAVE_SIGSETJMP
/* If sigsetjmp is a macro, configure won't pick it up. */
# ifdef sigsetjmp
@ -769,8 +774,7 @@ fopen_excl (const char *fname, int binary)
open_id = 13;
fd = open( fname, /* File name. */
flags, /* Flags. */
0777, /* Mode for default protection.
*/
0777, /* Mode for default protection. */
"rfm=stmlf", /* Stream_LF. */
OPEN_OPT_ARGS); /* Access callback. */
}
@ -918,6 +922,19 @@ acceptable (const char *s)
return true;
}
/* Determine whether an URL is acceptable to be followed, according to
regex patterns to accept/reject. */
bool
accept_url (const char *s)
{
if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
return false;
if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
return false;
return true;
}
/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p()
will return true if and only if D2 begins with `/something/' or is exactly
'/something'. */
@ -1826,6 +1843,17 @@ number_to_static_string (wgint number)
ringpos = (ringpos + 1) % RING_SIZE;
return buf;
}
/* Converts the byte to bits format if --report-bps option is enabled
*/
wgint
convert_to_bits (wgint num)
{
if (opt.report_bps)
return num * 8;
return num;
}
/* Determine the width of the terminal we're running on. If that's
not possible, return 0. */
@ -2299,6 +2327,89 @@ base64_decode (const char *base64, void *dest)
return q - (char *) dest;
}
#ifdef HAVE_LIBPCRE
/* Compiles the PCRE regex. */
void *
compile_pcre_regex (const char *str)
{
const char *errbuf;
int erroffset;
pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
if (! regex)
{
fprintf (stderr, _("Invalid regular expression %s, %s\n"),
quote (str), errbuf);
return false;
}
return regex;
}
#endif
/* Compiles the POSIX regex. */
void *
compile_posix_regex (const char *str)
{
regex_t *regex = xmalloc (sizeof (regex_t));
int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
if (errcode != 0)
{
int errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
char *errbuf = xmalloc (errbuf_size);
regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
fprintf (stderr, _("Invalid regular expression %s, %s\n"),
quote (str), errbuf);
xfree (errbuf);
return NULL;
}
return regex;
}
#ifdef HAVE_LIBPCRE
#define OVECCOUNT 30
/* Matches a PCRE regex. */
bool
match_pcre_regex (const void *regex, const char *str)
{
int l = strlen (str);
int ovector[OVECCOUNT];
int rc = pcre_exec ((pcre *) regex, 0, str, l, 0, 0, ovector, OVECCOUNT);
if (rc == PCRE_ERROR_NOMATCH)
return false;
else if (rc < 0)
{
logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
quote (str), rc);
return false;
}
else
return true;
}
#undef OVECCOUNT
#endif
/* Matches a POSIX regex. */
bool
match_posix_regex (const void *regex, const char *str)
{
int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
if (rc == REG_NOMATCH)
return false;
else if (rc == 0)
return true;
else
{
int errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
char *errbuf = xmalloc (errbuf_size);
regerror (rc, opt.acceptregex, errbuf, errbuf_size);
logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
quote (str), rc);
xfree (errbuf);
return false;
}
}
#undef IS_ASCII
#undef NEXT_CHAR

View File

@ -90,6 +90,7 @@ char *file_merge (const char *, const char *);
int fnmatch_nocase (const char *, const char *, int);
bool acceptable (const char *);
bool accept_url (const char *);
bool accdir (const char *s);
char *suffix (const char *s);
bool match_tail (const char *, const char *, bool);
@ -127,6 +128,7 @@ char *human_readable (HR_NUMTYPE);
int numdigit (wgint);
char *number_to_string (char *, wgint);
char *number_to_static_string (wgint);
wgint convert_to_bits (wgint);
int determine_screen_width (void);
int random_number (int);
@ -141,6 +143,14 @@ void xsleep (double);
int base64_encode (const void *, int, char *);
int base64_decode (const char *, void *);
#ifdef HAVE_LIBPCRE
void *compile_pcre_regex (const char *);
bool match_pcre_regex (const void *, const char *);
#endif
void *compile_posix_regex (const char *);
bool match_posix_regex (const void *, const char *);
void stable_sort (void *, size_t, size_t, int (*) (const void *, const void *));
const char *print_decimal (double);

1440
src/warc.c Normal file

File diff suppressed because it is too large Load Diff

23
src/warc.h Normal file
View File

@ -0,0 +1,23 @@
/* Declarations of WARC helper methods. */
#ifndef WARC_H
#define WARC_H
#include "host.h"
void warc_init (void);
void warc_close (void);
void warc_timestamp (char *timestamp);
void warc_uuid_str (char *id_str);
FILE * warc_tempfile (void);
bool warc_write_request_record (char *url, char *timestamp_str,
char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset);
bool warc_write_response_record (char *url, char *timestamp_str,
char *concurrent_to_uuid, ip_address *ip, FILE *body, off_t payload_offset,
char *mime_type, int response_code, char *redirect_location);
bool warc_write_resource_record (char *resource_uuid, const char *url,
const char *timestamp_str, const char *concurrent_to_uuid, ip_address *ip,
const char *content_type, FILE *body, off_t payload_offset);
#endif /* WARC_H */

View File

@ -353,7 +353,9 @@ typedef enum
PROXERR,
/* 50 */
AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR,
UNLINKERR, NEWLOCATION_KEEP_POST
UNLINKERR, NEWLOCATION_KEEP_POST, CLOSEFAILED,
WARC_ERR, WARC_TMP_FOPENERR, WARC_TMP_FWRITEERR
} uerr_t;
struct range {

View File

@ -1,3 +1,9 @@
2012-06-16 Giuseppe Scrivano <gscrivano@gnu.org>
* Makefile.am (EXTRA_DIST): Add Test-stdouterr.px.
* run-px (tests): Likewise.
* Test-stdouterr.px: New file.
2011-06-03 Merinov Nikolay <kim.roader@gmail.com>
* Test-idn-cmd-utf8.px: Added test for idn with utf-8 local encoding.

View File

@ -124,6 +124,7 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \
Test-restrict-ascii.px \
Test-Restrict-Lowercase.px \
Test-Restrict-Uppercase.px \
Test-stdouterr.px \
Test--spider-fail.px \
Test--spider.px \
Test--spider-r-HTTP-Content-Disposition.px \

48
tests/Test-stdouterr.px Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env perl
use strict;
use warnings;
use HTTPTest;
###############################################################################
# code, msg, headers, content
my %urls = (
'/somefile.txt' => {
code => "200",
msg => "Dontcare",
headers => {
"Content-type" => "text/plain",
},
content => "blabla",
},
);
unless(-e "/dev/full") {
exit(2); # skip
}
my $cmdline = $WgetTest::WGETPATH . " -c http://localhost:{{port}}/somefile.txt -O /dev/full";
my $expected_error_code = 3;
my %existing_files = (
);
my %expected_downloaded_files = (
);
###############################################################################
my $the_test = HTTPTest->new (name => "Test-stdouterr",
input => \%urls,
cmdline => $cmdline,
errcode => $expected_error_code,
existing => \%existing_files,
output => \%expected_downloaded_files);
exit $the_test->run();
# vim: et ts=4 sw=4

View File

@ -74,6 +74,7 @@ my @tests = (
'Test-restrict-ascii.px',
'Test-Restrict-Lowercase.px',
'Test-Restrict-Uppercase.px',
'Test-stdouterr.px',
'Test--spider-fail.px',
'Test--spider-r-HTTP-Content-Disposition.px',
'Test--spider-r--no-content-disposition.px',

View File

@ -33,11 +33,11 @@ my $tex_content = read_file($tex_file);
my @args = ([
$main_content,
qr/static \s+? struct \s+? cmdline_option \s+? option_data\[\] \s+? = \s+? \{ (.*?) \}\;/sx,
qr/static \s+? struct \s+? cmdline_option \s+? option_data\[\] \s+? = \s+? \{ (.+?) \}\;/sx,
[ qw(long_name short_name type data argtype) ],
], [
$init_content,
qr/commands\[\] \s+? = \s+? \{ (.*?) \}\;/sx,
qr/commands\[\] \s+? = \s+? \{ (.+?) \}\;/sx,
[ qw(name place action) ],
]);
@ -78,18 +78,18 @@ sub extract_entries
my (@entries, %index, $i);
foreach my $chunk (@$chunks) {
my ($args) = $chunk =~ /\{ \s+? (.*?) \s+? \}/sx;
my ($args) = $chunk =~ /\{ \s+? (.+?) \s+? \}/sx;
next unless defined $args;
my @args = map {
tr/'"//d; $_
} map {
/\((.*?)\)/ ? $1 : $_
/\((.+?)\)/ ? $1 : $_
} split /\,\s+/, $args;
my $entry = { map { $_ => shift @args } @$names };
($entry->{line}) = $chunk =~ /^ \s+? (\{.*)/mx;
($entry->{line}) = $chunk =~ /^ \s+? (\{.+)/mx;
if ($chunk =~ /deprecated/i) {
$entries[-1]->{deprecated} = true;
}
@ -103,9 +103,9 @@ sub extract_entries
push @entries, $entry;
}
push @entries, \%index;
push @entries, { %index };
return \@entries;
return [ @entries ];
}
sub output_results
@ -281,7 +281,7 @@ sub emit_undocumented_opts
while ($tex =~ /^\@item\w*? \s+? --([-a-z0-9]+)/gmx) {
$tex_items{$1} = true;
}
my ($help) = $main =~ /\n print_help .*? \{\n (.*) \n\} \n/sx;
my ($help) = $main =~ /\n print_help .*? \{\n (.+) \n\} \n/sx;
while ($help =~ /--([-a-z0-9]+)/g) {
$main_items{$1} = true;
}

View File

@ -128,5 +128,11 @@ main (int argc, char *argv[])
exit (EXIT_FAILURE);
}
if (close (fd) < 0)
{
perror (PROGRAM_NAME ": close");
exit (EXIT_FAILURE);
}
return 0;
}