mirror of
https://github.com/mirror/wget.git
synced 2025-03-14 20:00:15 +08:00
Parse metalink info to an internal set of structures.
This commit is contained in:
parent
6b59fab021
commit
4cd6292b31
291
src/metalink.c
291
src/metalink.c
@ -41,6 +41,7 @@ as that of the covered work. */
|
||||
#include "sha1.h"
|
||||
#include "sha256.h"
|
||||
#include "metalink.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define HASH_TYPES 3
|
||||
/* Between MD5, SHA1 and SHA256, SHA256 has the greatest hash length, which is
|
||||
@ -51,6 +52,283 @@ static char supported_hashes[HASH_TYPES][7] = {"sha256", "sha1", "md5"};
|
||||
static int digest_sizes[HASH_TYPES] = {SHA256_DIGEST_SIZE, SHA1_DIGEST_SIZE, MD5_DIGEST_SIZE};
|
||||
static int (*hash_function[HASH_TYPES]) (FILE *, void *) = {sha256_stream, sha1_stream, md5_stream};
|
||||
|
||||
mlink *
|
||||
parse_metalink(char *input_file)
|
||||
{
|
||||
int err;
|
||||
metalink_t *metalink;
|
||||
metalink_file_t **files;
|
||||
metalink_resource_t **resources;
|
||||
metalink_checksum_t **checksums;
|
||||
metalink_chunk_checksum_t *chunk_checksum;
|
||||
metalink_piece_hash_t **piece_hashes;
|
||||
mlink *mlink;
|
||||
err = metalink_parse_file (input_file, &metalink);
|
||||
if(err != 0 || !metalink)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, "Libmetalink could not parse the metalink file.\n");
|
||||
return NULL;
|
||||
}
|
||||
else if(metalink->files == NULL) {
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Metalink doesn't have any file data.\n");
|
||||
metalink_delete(metalink);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mlink = malloc (sizeof(mlink));
|
||||
mlink->identity = (metalink->identity ? xstrdup (metalink->identity) : NULL);
|
||||
mlink->tags = (metalink->tags ? xstrdup (metalink->tags) : NULL);
|
||||
mlink->files = NULL;
|
||||
mlink->num_of_files = 0;
|
||||
|
||||
for (files = metalink->files; *files; ++files)
|
||||
{
|
||||
mlink_file *file;
|
||||
|
||||
if (!(*files)->name)
|
||||
{
|
||||
/* File name is missing */
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping file"
|
||||
" due to missing name/path.\n");
|
||||
continue;
|
||||
}
|
||||
else if (!(*files)->resources)
|
||||
{
|
||||
/* URL is missing */
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping file(%s)"
|
||||
" due to missing resources.\n", (*files)->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
file = malloc(sizeof(mlink_file));
|
||||
++(mlink->num_of_files);
|
||||
file -> next = (mlink->files);
|
||||
(mlink->files) = file;
|
||||
|
||||
file->name = xstrdup ((*files)->name);
|
||||
file->size = (*files)->size;
|
||||
file->maxconnections = (*files)->maxconnections;
|
||||
file->version = ((*files)->version ? xstrdup ((*files)->version) : NULL);
|
||||
file->language = ((*files)->language ? xstrdup ((*files)->language) : NULL);
|
||||
file->os = ((*files)->os ? xstrdup ((*files)->os) : NULL);
|
||||
file->resources = NULL;
|
||||
file->checksums = NULL;
|
||||
file->chunk_checksum = NULL;
|
||||
file->num_of_res = file->num_of_checksums = 0;
|
||||
|
||||
for (resources = (*files)->resources; *resources; ++resources)
|
||||
{
|
||||
mlink_resource *resource;
|
||||
|
||||
if (!(*resources)->url)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping resource"
|
||||
" due to missing URL.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
resource = malloc (sizeof(mlink_resource));
|
||||
++(file->num_of_res);
|
||||
|
||||
resource->url = xstrdup ((*resources)->url);
|
||||
resource->type = ((*resources)->type ? xstrdup ((*resources)->type) : NULL);
|
||||
resource->location = ((*resources)->location ? xstrdup ((*resources)->location) : NULL);
|
||||
resource->preference = (*resources)->preference;
|
||||
resource->maxconnections = (*resources)->maxconnections;
|
||||
|
||||
resource->next = (file->resources);
|
||||
(file->resources) = resource;
|
||||
}
|
||||
|
||||
for (checksums = (*files)->checksums; *checksums; ++checksums)
|
||||
{
|
||||
mlink_checksum *checksum = malloc (sizeof(mlink_checksum));
|
||||
|
||||
if (!(*checksums)->type)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping checksum"
|
||||
" due to missing hash type.\n");
|
||||
continue;
|
||||
}
|
||||
else if (!(*checksums)->hash)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping resource"
|
||||
" due to missing hash value.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
checksum->type = ((*checksums)->type ? xstrdup ((*checksums)->type) : NULL);
|
||||
checksum->hash = ((*checksums)->hash ? xstrdup ((*checksums)->hash) : NULL);
|
||||
|
||||
checksum->next = (file->checksums);
|
||||
(file->checksums) = checksum;
|
||||
}
|
||||
|
||||
if((chunk_checksum = (*files)->chunk_checksum))
|
||||
{
|
||||
mlink_chunk_checksum *chunk_sum;
|
||||
|
||||
if(!chunk_checksum->type)
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping chunk checksum"
|
||||
" due to missing type information.\n");
|
||||
else
|
||||
{
|
||||
chunk_sum = malloc (sizeof(mlink_chunk_checksum));
|
||||
chunk_sum->length = chunk_checksum->length;
|
||||
chunk_sum->type = (chunk_checksum->type ? xstrdup (chunk_checksum->type) : NULL);
|
||||
for (piece_hashes = chunk_checksum->piece_hashes; *piece_hashes; ++piece_hashes)
|
||||
{
|
||||
mlink_piece_hash piece_hash;
|
||||
if(!chunk_checksum->type)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping chunk checksum"
|
||||
" due to missing hash value for piece(%d).\n",
|
||||
(*piece_hashes)->piece);
|
||||
free (chunk_sum);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metalink_delete(metalink);
|
||||
return mlink;
|
||||
}
|
||||
|
||||
void
|
||||
elect_resources (mlink *mlink)
|
||||
{
|
||||
mlink_file *file = mlink -> files;
|
||||
mlink_resource *res, *prev;
|
||||
|
||||
for (; file; file = file->next)
|
||||
{
|
||||
prev = file->resources;
|
||||
res = prev->next;
|
||||
while (res)
|
||||
{
|
||||
if(strcmp(res->type, "ftp") || strcmp(res->type, "http"))
|
||||
{
|
||||
prev->next = res->next;
|
||||
free (res);
|
||||
}
|
||||
else
|
||||
{
|
||||
prev = prev->next;
|
||||
res = prev->next;
|
||||
}
|
||||
}
|
||||
res = file->resources;
|
||||
if(strcmp(res->type, "ftp") || strcmp(res->type, "http"))
|
||||
{
|
||||
file->resources = res->next;
|
||||
free(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
elect_checksums (mlink *mlink)
|
||||
{
|
||||
int i;
|
||||
mlink_file *file = mlink -> files;
|
||||
mlink_checksum *csum, *prev;
|
||||
|
||||
for (; file; file = file->next)
|
||||
{
|
||||
prev = file->checksums;
|
||||
csum = prev->next;
|
||||
while (csum)
|
||||
{
|
||||
for(i=0; i<HASH_TYPES && strcmp(csum->type, supported_hashes[i]); ++i);
|
||||
if (i < HASH_TYPES)
|
||||
{
|
||||
prev->next = csum->next;
|
||||
free (csum);
|
||||
}
|
||||
else
|
||||
{
|
||||
prev = prev->next;
|
||||
csum = prev->next;
|
||||
}
|
||||
}
|
||||
csum = file->checksums;
|
||||
for(i=0; i<HASH_TYPES && strcmp(csum->type, supported_hashes[i]); ++i);
|
||||
if (i < HASH_TYPES)
|
||||
{
|
||||
prev->next = csum->next;
|
||||
free (csum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
delete_mlink(mlink *metalink)
|
||||
{
|
||||
mlink_file *file, *tempfile;
|
||||
mlink_resource *res, *tempres;
|
||||
mlink_checksum *csum, *tempcsum;
|
||||
mlink_piece_hash *phash, *temphash;
|
||||
|
||||
if(!metalink)
|
||||
return;
|
||||
|
||||
xfree_null (metalink->tags);
|
||||
xfree_null (metalink->identity);
|
||||
|
||||
file = metalink->files;
|
||||
while (file)
|
||||
{
|
||||
xfree_null(file->os);
|
||||
xfree_null(file->language);
|
||||
xfree_null(file->version);
|
||||
xfree_null(file->name);
|
||||
|
||||
res = file->resources;
|
||||
while (res)
|
||||
{
|
||||
xfree_null (res->url);
|
||||
xfree_null (res->type);
|
||||
xfree_null (res->location);
|
||||
|
||||
tempres = res;
|
||||
res = res->next;
|
||||
free (tempres);
|
||||
}
|
||||
|
||||
csum = file->checksums;
|
||||
while (csum)
|
||||
{
|
||||
xfree_null (csum->type);
|
||||
xfree_null (csum->hash);
|
||||
|
||||
tempcsum = csum;
|
||||
csum = csum->next;
|
||||
free (tempcsum);
|
||||
}
|
||||
|
||||
if(file->chunk_checksum)
|
||||
{
|
||||
free (file->chunk_checksum->type);
|
||||
phash = file->chunk_checksum->piece_hashes;
|
||||
while (phash)
|
||||
{
|
||||
xfree_null (phash->hash);
|
||||
|
||||
temphash = phash;
|
||||
phash = phash->next;
|
||||
free (temphash);
|
||||
}
|
||||
}
|
||||
|
||||
tempfile = file;
|
||||
file = file->next;
|
||||
free (tempfile);
|
||||
}
|
||||
free (metalink);
|
||||
}
|
||||
|
||||
/* Parses metalink into type metalink_t and returns a pointer to it.
|
||||
Returns NULL if the parsing is failed. */
|
||||
metalink_t*
|
||||
@ -89,10 +367,10 @@ lower_hex_case (unsigned char *hash, int length)
|
||||
0 if all pairs of hashes compared turned out to be the same.
|
||||
1 if due to some error, comparisons could not be made. */
|
||||
int
|
||||
verify_file_hash (const char *filename, metalink_checksum_t **checksums)
|
||||
verify_file_hash (const char *filename, mlink_checksum *checksums)
|
||||
{
|
||||
int i, j, req_type, res = 0;
|
||||
|
||||
|
||||
unsigned char hash_raw[MAX_DIGEST_LENGTH];
|
||||
/* Points to a hash of supported type from the metalink file. The index dedicated
|
||||
to a type is inversely proportional to its strength. (check supported_types
|
||||
@ -100,6 +378,7 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
|
||||
unsigned char *metalink_hashes[HASH_TYPES];
|
||||
unsigned char file_hash[2 * MAX_DIGEST_LENGTH + 1];
|
||||
FILE *file;
|
||||
mlink_checksum *checksum;
|
||||
|
||||
if (!checksums)
|
||||
{
|
||||
@ -113,9 +392,9 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
|
||||
metalink_hashes[i] = NULL;
|
||||
|
||||
/* Fill metalink_hashes to contain an instance of supported types of hashes. */
|
||||
for (i = 0; checksums[i] != NULL; ++i)
|
||||
for (checksum = checksums; checksum; checksum = checksum->next)
|
||||
for (j = 0; j < HASH_TYPES; ++j)
|
||||
if (!strcmp(checksums[i]->type, supported_hashes[j]))
|
||||
if (!strcmp(checksum->type, supported_hashes[j]))
|
||||
{
|
||||
if(metalink_hashes[j])
|
||||
{
|
||||
@ -128,7 +407,7 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
metalink_hashes[j] = checksums[i]->hash;
|
||||
metalink_hashes[j] = checksum->hash;
|
||||
}
|
||||
|
||||
for (i = 0; !metalink_hashes[i]; ++i);
|
||||
@ -166,7 +445,7 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
|
||||
sprintf(file_hash + 2 * j, "%02x", hash_raw[j]);
|
||||
|
||||
lower_hex_case(metalink_hashes[req_type], 2 * digest_sizes[req_type]);
|
||||
if (strcmp(checksums[req_type]->hash, file_hash))
|
||||
if (strcmp(metalink_hashes[req_type], file_hash))
|
||||
{
|
||||
logprintf (LOG_VERBOSE, "Verifying(%s) failed: %s hashes are different.\n",
|
||||
filename, supported_hashes[i]);
|
||||
|
@ -32,8 +32,71 @@ as that of the covered work. */
|
||||
#ifndef MLINK_H
|
||||
#define MLINK_H
|
||||
|
||||
typedef struct metalink_piece_hash
|
||||
{
|
||||
struct metalink_piece_hash *next;
|
||||
|
||||
int piece;
|
||||
char *hash;
|
||||
} mlink_piece_hash;
|
||||
|
||||
typedef struct metalink_checksum
|
||||
{
|
||||
struct metalink_checksum *next;
|
||||
|
||||
char *type;
|
||||
char *hash;
|
||||
} mlink_checksum;
|
||||
|
||||
typedef struct metalink_resource
|
||||
{
|
||||
struct metalink_resource *next;
|
||||
|
||||
char *url;
|
||||
char *type;
|
||||
char *location;
|
||||
int preference;
|
||||
int maxconnections;
|
||||
} mlink_resource;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *type;
|
||||
int length;
|
||||
mlink_piece_hash *piece_hashes;
|
||||
} mlink_chunk_checksum;
|
||||
|
||||
typedef struct metalink_file
|
||||
{
|
||||
struct metalink_file *next;
|
||||
|
||||
char *name;
|
||||
long long int size;
|
||||
char *version;
|
||||
char *language;
|
||||
char *os;
|
||||
int maxconnections;
|
||||
mlink_resource *resources;
|
||||
int num_of_res;
|
||||
mlink_checksum *checksums;
|
||||
int num_of_checksums;
|
||||
mlink_chunk_checksum *chunk_checksum;
|
||||
} mlink_file;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *identity;
|
||||
char *tags;
|
||||
mlink_file *files;
|
||||
int num_of_files;
|
||||
} mlink;
|
||||
|
||||
mlink *parse_metalink (char *);
|
||||
|
||||
void delete_mlink (mlink *);
|
||||
|
||||
metalink_t *metalink_context (const char *);
|
||||
|
||||
int verify_file_hash(const char *, metalink_checksum_t **);
|
||||
int verify_file_hash (const char *, mlink_checksum *);
|
||||
|
||||
#endif /* MLINK_H */
|
||||
|
55
src/retr.c
55
src/retr.c
@ -1063,18 +1063,16 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
input_file = (char *) file;
|
||||
|
||||
#ifdef ENABLE_METALINK
|
||||
metalink_t *metalink;
|
||||
mlink *mlink = parse_metalink(input_file);
|
||||
|
||||
if(opt.metalink_file && (metalink = metalink_context(input_file)))
|
||||
if(opt.metalink_file && mlink)
|
||||
{
|
||||
int i, j, r, url_err, retries;
|
||||
int ret, dt = 0;
|
||||
int ranges_covered, chunk_size, num_of_resources;
|
||||
int i, j, r, ranges_covered, chunk_size, url_err, retries, ret, dt=0;
|
||||
pthread_t thread;
|
||||
sem_t retr_sem;
|
||||
uerr_t status;
|
||||
metalink_file_t* file;
|
||||
metalink_resource_t* resource;
|
||||
mlink_file* file;
|
||||
mlink_resource *resource;
|
||||
struct s_thread_ctx *thread_ctx;
|
||||
|
||||
init_temp_files();
|
||||
@ -1082,20 +1080,17 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
thread_ctx = malloc (opt.jobs * (sizeof *thread_ctx));
|
||||
|
||||
retries = 0;
|
||||
i = 0;
|
||||
while ((file = metalink->files[i]) != NULL)
|
||||
file = mlink->files;
|
||||
while (file)
|
||||
{
|
||||
memset(thread_ctx, '\0', opt.jobs * (sizeof *thread_ctx));
|
||||
num_of_resources = 0;
|
||||
while (file->resources[num_of_resources])
|
||||
++num_of_resources;
|
||||
|
||||
/* If chunk_size is too small, set it equal to MIN_CHUNK_SIZE. */
|
||||
chunk_size = (file->size) / opt.jobs;
|
||||
if(chunk_size < MIN_CHUNK_SIZE)
|
||||
chunk_size = MIN_CHUNK_SIZE;
|
||||
|
||||
j = fill_ranges_data(num_of_resources, file->size, chunk_size);
|
||||
j = fill_ranges_data(file->num_of_res, file->size, chunk_size);
|
||||
|
||||
/* If chunk_size was set to MIN_CHUNK_SIZE, opt.jobs should be corrected. */
|
||||
if(j < opt.jobs)
|
||||
@ -1105,16 +1100,16 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
|
||||
sem_init (&retr_sem, 0, 0);
|
||||
j = ranges_covered = 0;
|
||||
|
||||
resource = file->resources;
|
||||
|
||||
/* Assign values to thread_ctx[] elements and spawn threads that will
|
||||
conduct the download. */
|
||||
for (r = 0; r < opt.jobs; ++r)
|
||||
{
|
||||
resource = file->resources[j];
|
||||
if (!resource)
|
||||
{
|
||||
j = 0;
|
||||
resource = file->resources[j];
|
||||
resource = file->resources;
|
||||
}
|
||||
|
||||
thread_ctx[r].referer = NULL;
|
||||
@ -1138,6 +1133,7 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
return URLERROR;
|
||||
}
|
||||
++j;
|
||||
resource = resource->next;
|
||||
}
|
||||
|
||||
/* Until all the ranges are covered, collect threads. */
|
||||
@ -1170,18 +1166,24 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
|
||||
/* Look for resource from which downloading this range is not
|
||||
tried. */
|
||||
for (j = 0; j < num_of_resources; ++j)
|
||||
if (!((thread_ctx[r].range)->resources)[j])
|
||||
break;
|
||||
j = 0;
|
||||
resource = file->resources;
|
||||
while (j < file->num_of_res)
|
||||
{
|
||||
if (!((thread_ctx[r].range)->resources)[j])
|
||||
break;
|
||||
++j;
|
||||
resource = resource -> next;
|
||||
}
|
||||
/* If there is such a resource, then update the range values
|
||||
to try that not-tried resource and spawn thread.
|
||||
If all the resources are exhausted, stop collecting the
|
||||
threads, as the download failed. */
|
||||
if (j < num_of_resources)
|
||||
if (j < file->num_of_res)
|
||||
{
|
||||
thread_ctx[r].url = file->resources[j]->url;
|
||||
if ((thread_ctx[r].range)->bytes_covered)
|
||||
{
|
||||
thread_ctx[r].url = resource->url;
|
||||
(thread_ctx[r].range)->first_byte =
|
||||
(thread_ctx[r].range)->bytes_covered;
|
||||
(thread_ctx[r].range)->bytes_covered = 0;
|
||||
@ -1219,9 +1221,9 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
{
|
||||
if(retries < opt.n_retries)
|
||||
{
|
||||
--i;
|
||||
logprintf (LOG_VERBOSE, "Retrying to download(%s). (TRY #%d)\n",
|
||||
file->name, ++retries + 1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1251,8 +1253,9 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
logprintf (LOG_VERBOSE, "Verifying(%s) failed.\n", file->name);
|
||||
if(retries < opt.n_retries)
|
||||
{
|
||||
--i;
|
||||
logprintf (LOG_VERBOSE, "Retrying to download(%s). (TRY #%d)\n", file->name, ++retries + 1);
|
||||
logprintf (LOG_VERBOSE, "Retrying to download(%s). (TRY #%d)\n",
|
||||
file->name, ++retries + 1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1265,13 +1268,13 @@ retrieve_from_file (const char *file, bool html, int *count)
|
||||
status = QUOTEXC;
|
||||
break;
|
||||
}
|
||||
++i;
|
||||
file = file->next;
|
||||
}
|
||||
|
||||
free(thread_ctx);
|
||||
clean_ranges ();
|
||||
clean_temp_files ();
|
||||
metalink_delete(metalink);
|
||||
delete_mlink(mlink);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user