wget/src/url.h
hniksic b0b1c815c1 [svn] A bunch of new features:
- use mmap() to read whole files in core instead of allocating memory
  and read'ing it.

- use a new, more general, HTML parser (html-parse.c) and interface to
  it from Wget (html-url.c).

- respect <meta name=robots content=nofollow> (easy with the new HTML
  parser).

- use hash tables instead of linked lists in places where the lists
  were used to facilitate mappings.

- rewrite the code in host.c to be more readable and faster (hash
  tables instead of home-grown lists.)

- make convert_links properly convert partial URLs to complete ones
  for those URLs that have *not* been downloaded.

- use HTTP persistent connections where available.  very
  simple-minded, caches the last connection to the server.

Published in <sxshf533d5r.fsf@florida.arsdigita.de>.
2000-11-19 12:50:10 -08:00

118 lines
3.9 KiB
C

/* Declarations for url.c.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of Wget.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef URL_H
#define URL_H
/* If the string contains unsafe characters, duplicate it with
encode_string, otherwise just copy it with strdup. */
#define CLEANDUP(x) (contains_unsafe (x) ? encode_string (x) : xstrdup (x))
/* Structure containing info on a URL. */
struct urlinfo
{
char *url; /* Unchanged URL */
uerr_t proto; /* URL protocol */
char *host; /* Extracted hostname */
unsigned short port;
char ftp_type;
char *path, *dir, *file, *qstring;
/* Path, dir, file, and query string
(properly decoded) */
char *user, *passwd; /* Username and password */
struct urlinfo *proxy; /* The exact string to pass to proxy
server */
char *referer; /* The source from which the request
URI was obtained */
char *local; /* The local filename of the URL
document */
};
enum uflags
{
URELATIVE = 0x0001, /* Is URL relative? */
UNOPROTO = 0x0002, /* Is URL without a protocol? */
UABS2REL = 0x0004, /* Convert absolute to relative? */
UREL2ABS = 0x0008 /* Convert relative to absolute? */
};
/* A structure that defines the whereabouts of a URL, i.e. its
position in an HTML document, etc. */
typedef struct _urlpos
{
char *url; /* URL */
char *local_name; /* Local file to which it was saved */
enum uflags flags; /* Various flags */
int pos, size; /* Relative position in the buffer */
struct _urlpos *next; /* Next struct in list */
} urlpos;
/* downloaded_file() takes a parameter of this type and returns this type. */
typedef enum
{
/* Return enumerators: */
FILE_NOT_ALREADY_DOWNLOADED = 0,
/* Return / parameter enumerators: */
FILE_DOWNLOADED_NORMALLY,
FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,
/* Parameter enumerators: */
CHECK_FOR_FILE
} downloaded_file_t;
/* Function declarations */
int skip_url PARAMS ((const char *));
int contains_unsafe PARAMS ((const char *));
char *encode_string PARAMS ((const char *));
struct urlinfo *newurl PARAMS ((void));
void freeurl PARAMS ((struct urlinfo *, int));
uerr_t urlproto PARAMS ((const char *));
int skip_proto PARAMS ((const char *));
int has_proto PARAMS ((const char *));
int skip_uname PARAMS ((const char *));
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
char *str_url PARAMS ((const struct urlinfo *, int));
int url_equal PARAMS ((const char *, const char *));
urlpos *get_urls_file PARAMS ((const char *));
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
void free_urlpos PARAMS ((urlpos *));
char *url_concat PARAMS ((const char *, const char *));
void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *));
char *url_filename PARAMS ((const struct urlinfo *));
void opt_url PARAMS ((struct urlinfo *));
char *getproxy PARAMS ((uerr_t));
int no_proxy_match PARAMS ((const char *, const char **));
void convert_links PARAMS ((const char *, urlpos *));
urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
#endif /* URL_H */