wget/src/host.c

337 lines
8.0 KiB
C
Raw Normal View History

1999-12-02 15:42:23 +08:00
/* Dealing with host names.
Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
1999-12-02 15:42:23 +08:00
2001-05-28 03:35:15 +08:00
This file is part of GNU Wget.
1999-12-02 15:42:23 +08:00
2001-05-28 03:35:15 +08:00
GNU Wget is free software; you can redistribute it and/or modify
1999-12-02 15:42:23 +08:00
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
2001-05-28 03:35:15 +08:00
GNU Wget is distributed in the hope that it will be useful,
1999-12-02 15:42:23 +08:00
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
2001-05-28 03:35:15 +08:00
along with Wget; if not, write to the Free Software
1999-12-02 15:42:23 +08:00
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <assert.h>
#include <sys/types.h>
#ifdef WINDOWS
# include <winsock.h>
#else
# include <sys/socket.h>
# include <netinet/in.h>
#ifndef __BEOS__
1999-12-02 15:42:23 +08:00
# include <arpa/inet.h>
#endif
1999-12-02 15:42:23 +08:00
# include <netdb.h>
#endif /* WINDOWS */
#ifndef NO_ADDRESS
#define NO_ADDRESS NO_DATA
#endif
1999-12-02 15:42:23 +08:00
#ifdef HAVE_SYS_UTSNAME_H
# include <sys/utsname.h>
#endif
#include <errno.h>
#include "wget.h"
#include "utils.h"
#include "host.h"
#include "url.h"
#include "hash.h"
1999-12-02 15:42:23 +08:00
#ifndef errno
extern int errno;
#endif
#define IP4_ADDRESS_LENGTH 4
/* Mapping between known hosts and to lists of their addresses. */
struct hash_table *host_name_addresses_map;
/* Lists of addresses. This should eventually be extended to handle
IPv6. */
struct address_list {
int count; /* number of adrresses */
unsigned char *buffer; /* buffer which holds all of them. */
int refcount; /* so we know whether to free it or
not. */
};
#define ADDR_LOCATION(al, index) ((al)->buffer + index * IP4_ADDRESS_LENGTH)
/* Return the number of addresses in the list. */
int
address_list_count (struct address_list *al)
1999-12-02 15:42:23 +08:00
{
return al->count;
}
1999-12-02 15:42:23 +08:00
/* Copy address number INDEX to IP_STORE. */
void
address_list_copy_one (struct address_list *al, int index,
unsigned char *ip_store)
{
memcpy (ip_store, ADDR_LOCATION (al, index), IP4_ADDRESS_LENGTH);
1999-12-02 15:42:23 +08:00
}
/* Check whether two address lists have all their IPs in common. */
int
address_list_match_all (struct address_list *al1, struct address_list *al2)
{
if (al1 == al2)
return 1;
if (al1->count != al2->count)
return 0;
return 0 == memcmp (al1->buffer, al2->buffer,
al1->count * IP4_ADDRESS_LENGTH);
}
/* Create an address_list out of a NULL-terminated list of addresses,
as returned by gethostbyname. */
static struct address_list *
address_list_new (char **h_addr_list)
{
int count = 0, i;
struct address_list *al = xmalloc (sizeof (struct address_list));
while (h_addr_list[count])
++count;
assert (count > 0);
al->count = count;
al->buffer = xmalloc (count * IP4_ADDRESS_LENGTH);
al->refcount = 1;
for (i = 0; i < count; i++)
memcpy (ADDR_LOCATION (al, i), h_addr_list[i], IP4_ADDRESS_LENGTH);
return al;
}
static void
address_list_delete (struct address_list *al)
1999-12-02 15:42:23 +08:00
{
xfree (al->buffer);
xfree (al);
}
void
address_list_release (struct address_list *al)
{
--al->refcount;
DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
if (al->refcount <= 0)
{
DEBUGP (("Deleting unused %p.\n", al));
address_list_delete (al);
}
}
/* The same as inet_ntoa, but without the need for a cast, or for
#including the netinet stuff. */
char *
pretty_print_address (const unsigned char *addr)
{
return inet_ntoa (*(struct in_addr *)addr);
1999-12-02 15:42:23 +08:00
}
/* Add host name HOST with the address ADDR_TEXT to the cache.
ADDR_LIST is a NULL-terminated list of addresses, as in struct
hostent. */
1999-12-02 15:42:23 +08:00
static void
cache_host_lookup (const char *host, struct address_list *al)
1999-12-02 15:42:23 +08:00
{
if (!host_name_addresses_map)
host_name_addresses_map = make_nocase_string_hash_table (0);
++al->refcount;
hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
#ifdef DEBUG
if (opt.debug)
{
int i;
debug_logprintf ("Caching %s =>", host);
for (i = 0; i < al->count; i++)
debug_logprintf (" %s",
pretty_print_address (ADDR_LOCATION (al, i)));
debug_logprintf ("\n");
}
#endif
}
struct address_list *
lookup_host (const char *host, int silent)
{
struct address_list *al = NULL;
1999-12-02 15:42:23 +08:00
unsigned long addr;
struct hostent *hptr;
/* If the address is of the form d.d.d.d, no further lookup is
needed. */
addr = (unsigned long)inet_addr (host);
1999-12-02 15:42:23 +08:00
if ((int)addr != -1)
{
unsigned char tmpstore[IP4_ADDRESS_LENGTH];
char *lst[] = { tmpstore, NULL };
/* ADDR is defined to be in network byte order, which is what
this returns, so we can just copy it to STORE_IP. However,
on big endian 64-bit architectures the value will be stored
in the *last*, not first four bytes. OFFSET makes sure that
we copy the correct four bytes. */
int offset;
#ifdef WORDS_BIGENDIAN
offset = sizeof (unsigned long) - IP4_ADDRESS_LENGTH;
#else
offset = 0;
#endif
memcpy (tmpstore, (char *)&addr + offset, IP4_ADDRESS_LENGTH);
return address_list_new (lst);
1999-12-02 15:42:23 +08:00
}
/* By now we know that the host name we got is not of the form
d.d.d.d. Try to find it in our cache of host names. */
if (host_name_addresses_map)
al = hash_table_get (host_name_addresses_map, host);
if (al)
{
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
++al->refcount;
return al;
}
if (!silent)
logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
/* Look up the host using gethostbyname(). Note that we use
gethostbyname() rather than ngethostbyname(), because we already
know that the address is not numerical. */
hptr = gethostbyname (host);
1999-12-02 15:42:23 +08:00
if (!hptr)
{
if (!silent)
logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
return NULL;
}
if (!silent)
logprintf (LOG_VERBOSE, _("done.\n"));
al = address_list_new (hptr->h_addr_list);
1999-12-02 15:42:23 +08:00
/* Cache the lookup information. */
cache_host_lookup (host, al);
return al;
1999-12-02 15:42:23 +08:00
}
1999-12-02 15:42:23 +08:00
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
accept_domain (struct url *u)
1999-12-02 15:42:23 +08:00
{
assert (u->host != NULL);
if (opt.domains)
{
if (!sufmatch ((const char **)opt.domains, u->host))
return 0;
}
if (opt.exclude_domains)
{
if (sufmatch ((const char **)opt.exclude_domains, u->host))
return 0;
}
return 1;
}
/* Check whether WHAT is matched in LIST, each element of LIST being a
pattern to match WHAT against, using backward matching (see
match_backwards() in utils.c).
If an element of LIST matched, 1 is returned, 0 otherwise. */
int
sufmatch (const char **list, const char *what)
{
int i, j, k, lw;
lw = strlen (what);
for (i = 0; list[i]; i++)
{
for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
2000-03-31 22:05:53 +08:00
if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
1999-12-02 15:42:23 +08:00
break;
/* The domain must be first to reach to beginning. */
if (j == -1)
return 1;
}
return 0;
}
/* Print error messages for host errors. */
char *
herrmsg (int error)
{
/* Can't use switch since some constants are equal (at least on my
system), and the compiler signals "duplicate case value". */
if (error == HOST_NOT_FOUND
|| error == NO_RECOVERY
|| error == NO_DATA
|| error == NO_ADDRESS
|| error == TRY_AGAIN)
return _("Host not found");
else
return _("Unknown error");
}
static int
host_cleanup_mapper (void *key, void *value, void *arg_ignored)
{
struct address_list *al;
xfree (key); /* host */
al = (struct address_list *)value;
assert (al->refcount == 1);
address_list_delete (al);
return 0;
}
1999-12-02 15:42:23 +08:00
void
host_cleanup (void)
1999-12-02 15:42:23 +08:00
{
if (host_name_addresses_map)
{
hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
hash_table_destroy (host_name_addresses_map);
host_name_addresses_map = NULL;
}
1999-12-02 15:42:23 +08:00
}