Keep fetched URLs in POSIX extended attributes

* configure.ac: Check for xattr availability
* src/Makefile.am: Add xattr.c
* src/ftp.c: Include xattr.h.
  (getftp): Set attributes if enabled.
* src/http.c: Include xattr.h.
  (gethttp): Add parameter 'original_url',
  set attributes if enabled.
  (http_loop): Add 'original_url' to call of gethttp().
* src/init.c: Add new option --xattr.
* src/main.c: Add new option --xattr, add description to help text.
* src/options.h: Add new config member 'enable_xattr'.
* src/xatrr.c: New file.
* src/xattr.h: New file.

These attributes provide a lightweight method of later determining
where a file was downloaded from.

This patch changes:
*   autoconf detects whether extended attributes are available and
    enables the code if they are.
*   The new flags --xattr and --no-xattr control whether xattr is enabled.
*   The new command "xattr = (on|off)" can be used in ~/.wgetrc or /etc/wgetrc
*   The original and redirected URLs are recorded as shown below.
*   This works for both single fetches and recursive mode.

The attributes that are set are:
user.xdg.origin.url: The URL that the content was fetched from.
user.xdg.referrer.url: The URL that was originally requested.

Here is an example, where http://archive.org redirects to https://archive.org:
$ wget --xattr http://archive.org
...
$ getfattr -d index.html
user.xdg.origin.url="https://archive.org/"
user.xdg.referrer.url="http://archive.org/"

These attributes were chosen based on those stored by Google Chrome
https://bugs.chromium.org/p/chromium/issues/detail?id=45903
and curl https://github.com/curl/curl/blob/master/src/tool_xattr.c
This commit is contained in:
Sean Burford 2016-07-21 14:15:49 +10:00 committed by Tim Rühsen
parent ef372a4f27
commit a933bdd31e
9 changed files with 205 additions and 5 deletions

View File

@ -771,11 +771,37 @@ AS_IF([test "X$with_cares" == "Xyes"],[
RESOLVER_INFO="libc, --bind-dns-address and --dns-servers not available"
])
dnl
dnl Extended Attribute support
dnl
AC_ARG_ENABLE([xattr],
[AS_HELP_STRING([--disable-xattr], [disable support for POSIX Extended Attributes])],
[ENABLE_XATTR=$enableval],
[ENABLE_XATTR=yes])
case "$host_os" in
*linux* | *darwin*) xattr_syscalls="fsetxattr" ;;
freebsd*) xattr_syscalls="extattr_set_fd" ;;
*) AC_MSG_NOTICE([Disabling Extended Attribute support: your system is not known to support extended attributes.])
ENABLE_XATTR=no
esac
if test "X${ENABLE_XATTR}" = "Xyes"; then
AC_CHECK_FUNCS([$xattr_syscalls], [], [
AC_MSG_NOTICE([Disabling Extended Attribute support: your system does not support $xattr_syscalls])
ENABLE_XATTR=no
])
fi
test "X${ENABLE_XATTR}" = "Xyes" && AC_DEFINE([ENABLE_XATTR], 1,
[Define if you want file meta-data storing into POSIX Extended Attributes compiled in.])
dnl Needed by src/Makefile.am
AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"])
AM_CONDITIONAL([WITH_SSL], [test "X$with_ssl" != "Xno"])
AM_CONDITIONAL([METALINK_IS_ENABLED], [test "X$with_metalink" != "Xno"])
AM_CONDITIONAL([WITH_XATTR], [test "X$ENABLE_XATTR" != "Xno"])
dnl
dnl Create output
@ -801,6 +827,7 @@ AC_MSG_NOTICE([Summary of build options:
Digest: $ENABLE_DIGEST
NTLM: $ENABLE_NTLM
OPIE: $ENABLE_OPIE
POSIX xattr: $ENABLE_XATTR
Debugging: $ENABLE_DEBUG
Assertions: $ENABLE_ASSERTION
Valgrind: $VALGRIND_INFO

View File

@ -39,6 +39,10 @@ if METALINK_IS_ENABLED
METALINK_OBJ = metalink.c
endif
if WITH_XATTR
XATTR_OBJ = xattr.c
endif
# The following line is losing on some versions of make!
DEFS = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\"
@ -49,14 +53,14 @@ wget_SOURCES = connect.c convert.c cookies.c ftp.c \
css_.c css-url.c \
ftp-basic.c ftp-ls.c hash.c host.c hsts.c html-parse.c html-url.c \
http.c init.c log.c main.c netrc.c progress.c ptimer.c \
recur.c res.c retr.c spider.c url.c warc.c \
recur.c res.c retr.c spider.c url.c warc.c $(XATTR_OBJ) \
utils.c exits.c build_info.c $(IRI_OBJ) $(METALINK_OBJ) \
css-url.h css-tokens.h connect.h convert.h cookies.h \
ftp.h hash.h host.h hsts.h html-parse.h html-url.h \
http.h http-ntlm.h init.h log.h mswindows.h netrc.h \
options.h progress.h ptimer.h recur.h res.h retr.h \
spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \
exits.h version.h metalink.h
exits.h version.h metalink.h xattr.h
nodist_wget_SOURCES = version.c
EXTRA_wget_SOURCES = iri.c
LDADD = $(LIBOBJS) ../lib/libgnu.a $(GETADDRINFO_LIB) $(HOSTENT_LIB) $(INET_NTOP_LIB) $(LIBSOCKET)\

View File

@ -52,6 +52,9 @@ as that of the covered work. */
#include "recur.h" /* for INFINITE_RECURSION */
#include "warc.h"
#include "c-strcase.h"
#ifdef ENABLE_XATTR
#include "xattr.h"
#endif
#ifdef __VMS
# include "vms.h"
@ -1546,6 +1549,13 @@ Error in server response, closing control connection.\n"));
tmrate = retr_rate (rd_size, con->dltime);
total_download_time += con->dltime;
#ifdef ENABLE_XATTR
if (opt.enable_xattr)
{
set_file_metadata (u->url, NULL, fp);
}
#endif
fd_close (local_sock);
/* Close the local file. */
if (!output_stream || con->cmd & DO_LIST)

View File

@ -66,6 +66,9 @@ as that of the covered work. */
# include "metalink.h"
# include "xstrndup.h"
#endif
#ifdef ENABLE_XATTR
#include "xattr.h"
#endif
#ifdef TESTING
#include "test.h"
@ -2892,8 +2895,8 @@ fail:
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
struct iri *iri, int count)
gethttp (struct url *u, struct url *original_url, struct http_stat *hs,
int *dt, struct url *proxy, struct iri *iri, int count)
{
struct request *req = NULL;
@ -3754,6 +3757,20 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
goto cleanup;
}
#ifdef ENABLE_XATTR
if (opt.enable_xattr)
{
if (original_url != u)
{
set_file_metadata (u->url, original_url->url, fp);
}
else
{
set_file_metadata (u->url, NULL, fp);
}
}
#endif
err = read_response_body (hs, sock, fp, contlen, contrange,
chunked_transfer_encoding,
u->url, warc_timestamp_str,
@ -3972,7 +3989,7 @@ http_loop (struct url *u, struct url *original_url, char **newloc,
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. */
err = gethttp (u, &hstat, dt, proxy, iri, count);
err = gethttp (u, original_url, &hstat, dt, proxy, iri, count);
/* Time? */
tms = datetime_str (time (NULL));

View File

@ -339,6 +339,9 @@ static const struct {
#ifdef USE_WATT32
{ "wdebug", &opt.wdebug, cmd_boolean },
#endif
#ifdef ENABLE_XATTR
{ "xattr", &opt.enable_xattr, cmd_boolean },
#endif
};
/* Look up CMDNAME in the commands[] and return its position in the
@ -482,6 +485,12 @@ defaults (void)
/* HSTS is enabled by default */
opt.hsts = true;
#endif
#ifdef ENABLE_XATTR
opt.enable_xattr = true;
#else
opt.enable_xattr = false;
#endif
}
/* Return the user's home directory (strdup-ed), or NULL if none is

View File

@ -435,6 +435,9 @@ static struct cmdline_option option_data[] =
{ "warc-tempdir", 0, OPT_VALUE, "warctempdir", -1 },
#ifdef USE_WATT32
{ "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 },
#endif
#ifdef ENABLE_XATTR
{ "xattr", 0, OPT_BOOLEAN, "xattr", -1 },
#endif
};
@ -703,6 +706,10 @@ Download:\n"),
--metalink-over-http use Metalink metadata from HTTP response headers\n"),
N_("\
--preferred-location preferred location for Metalink resources\n"),
#endif
#ifdef ENABLE_XATTR
N_("\
--no-xattr turn off storage of metadata in extended file attributes\n"),
#endif
"\n",

View File

@ -127,6 +127,8 @@ struct options
bool warc_keep_log; /* Store the log file in a WARC record. */
char **warc_user_headers; /* User-defined WARC header(s). */
bool enable_xattr; /* Store metadata in POSIX extended attributes. */
char *user; /* Generic username */
char *passwd; /* Generic password */
bool ask_passwd; /* Ask for password? */

79
src/xattr.c Normal file
View File

@ -0,0 +1,79 @@
/* xattr.h -- POSIX Extended Attribute support.
Copyright (C) 2016 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>. */
#include "wget.h"
#include <stdio.h>
#include <string.h>
#include "log.h"
#include "xattr.h"
#ifdef USE_XATTR
static int
write_xattr_metadata (const char *name, const char *value, FILE *fp)
{
int retval = -1;
if (name && value && fp)
{
retval = fsetxattr (fileno(fp), name, value, strlen(value), 0);
/* FreeBSD's extattr_set_fd returns the length of the extended attribute. */
retval = (retval < 0) ? retval : 0;
}
return retval;
}
#else /* USE_XATTR */
static int
write_xattr_metadata (const char *name, const char *value, FILE *fp)
{
(void)name;
(void)value;
(void)fp;
return 0;
}
#endif /* USE_XATTR */
int
set_file_metadata (const char *origin_url, const char *referrer_url, FILE *fp)
{
/* Save metadata about where the file came from (requested, final URLs) to
* user POSIX Extended Attributes of retrieved file.
*
* For more details about the user namespace see
* [http://freedesktop.org/wiki/CommonExtendedAttributes] and
* [http://0pointer.de/lennart/projects/mod_mime_xattr/].
*/
int retval = -1;
if (!origin_url || !fp)
return retval;
retval = write_xattr_metadata ("user.xdg.origin.url", escnonprint_uri (origin_url), fp);
if ((!retval) && referrer_url)
{
retval = write_xattr_metadata ("user.xdg.referrer.url", escnonprint_uri (referrer_url), fp);
}
return retval;
}

45
src/xattr.h Normal file
View File

@ -0,0 +1,45 @@
/* xattr.h -- POSIX Extended Attribute function mappings.
Copyright (C) 2016 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>. */
#include <stdio.h>
#ifndef _XATTR_H
#define _XATTR_H
/* Store metadata name/value attributes against fp. */
int set_file_metadata (const char *origin_url, const char *referrer_url, FILE *fp);
#if defined(__linux)
/* libc on Linux has fsetxattr (5 arguments). */
# include <sys/xattr.h>
# define USE_XATTR
#elif defined(__APPLE__)
/* libc on OS/X has fsetxattr (6 arguments). */
# include <sys/xattr.h>
# define fsetxattr(file, name, buffer, size, flags) \
fsetxattr((file), (name), (buffer), (size), 0, (flags))
# define USE_XATTR
#elif defined(__FreeBSD_version) && (__FreeBSD_version > 500000)
/* FreeBSD */
# include <sys/types.h>
# include <sys/extattr.h>
# define fsetxattr(file, name, buffer, size, flags) \
extattr_set_fd((file), EXTATTR_NAMESPACE_USER, (name), (buffer), (size))
# define USE_XATTR
#endif
#endif /* _XATTR_H */