Parse metalink info to an internal set of structures.

This commit is contained in:
Ilim Ugur 2012-08-20 16:02:27 +03:00
parent 6b59fab021
commit 4cd6292b31
3 changed files with 378 additions and 33 deletions

View File

@ -41,6 +41,7 @@ as that of the covered work. */
#include "sha1.h"
#include "sha256.h"
#include "metalink.h"
#include "utils.h"
#define HASH_TYPES 3
/* Between MD5, SHA1 and SHA256, SHA256 has the greatest hash length, which is
@ -51,6 +52,283 @@ static char supported_hashes[HASH_TYPES][7] = {"sha256", "sha1", "md5"};
static int digest_sizes[HASH_TYPES] = {SHA256_DIGEST_SIZE, SHA1_DIGEST_SIZE, MD5_DIGEST_SIZE};
static int (*hash_function[HASH_TYPES]) (FILE *, void *) = {sha256_stream, sha1_stream, md5_stream};
mlink *
parse_metalink(char *input_file)
{
int err;
metalink_t *metalink;
metalink_file_t **files;
metalink_resource_t **resources;
metalink_checksum_t **checksums;
metalink_chunk_checksum_t *chunk_checksum;
metalink_piece_hash_t **piece_hashes;
mlink *mlink;
err = metalink_parse_file (input_file, &metalink);
if(err != 0 || !metalink)
{
logprintf (LOG_VERBOSE, "Libmetalink could not parse the metalink file.\n");
return NULL;
}
else if(metalink->files == NULL) {
logprintf (LOG_VERBOSE, "PARSE METALINK: Metalink doesn't have any file data.\n");
metalink_delete(metalink);
return NULL;
}
mlink = malloc (sizeof(mlink));
mlink->identity = (metalink->identity ? xstrdup (metalink->identity) : NULL);
mlink->tags = (metalink->tags ? xstrdup (metalink->tags) : NULL);
mlink->files = NULL;
mlink->num_of_files = 0;
for (files = metalink->files; *files; ++files)
{
mlink_file *file;
if (!(*files)->name)
{
/* File name is missing */
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping file"
" due to missing name/path.\n");
continue;
}
else if (!(*files)->resources)
{
/* URL is missing */
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping file(%s)"
" due to missing resources.\n", (*files)->name);
continue;
}
file = malloc(sizeof(mlink_file));
++(mlink->num_of_files);
file -> next = (mlink->files);
(mlink->files) = file;
file->name = xstrdup ((*files)->name);
file->size = (*files)->size;
file->maxconnections = (*files)->maxconnections;
file->version = ((*files)->version ? xstrdup ((*files)->version) : NULL);
file->language = ((*files)->language ? xstrdup ((*files)->language) : NULL);
file->os = ((*files)->os ? xstrdup ((*files)->os) : NULL);
file->resources = NULL;
file->checksums = NULL;
file->chunk_checksum = NULL;
file->num_of_res = file->num_of_checksums = 0;
for (resources = (*files)->resources; *resources; ++resources)
{
mlink_resource *resource;
if (!(*resources)->url)
{
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping resource"
" due to missing URL.\n");
continue;
}
resource = malloc (sizeof(mlink_resource));
++(file->num_of_res);
resource->url = xstrdup ((*resources)->url);
resource->type = ((*resources)->type ? xstrdup ((*resources)->type) : NULL);
resource->location = ((*resources)->location ? xstrdup ((*resources)->location) : NULL);
resource->preference = (*resources)->preference;
resource->maxconnections = (*resources)->maxconnections;
resource->next = (file->resources);
(file->resources) = resource;
}
for (checksums = (*files)->checksums; *checksums; ++checksums)
{
mlink_checksum *checksum = malloc (sizeof(mlink_checksum));
if (!(*checksums)->type)
{
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping checksum"
" due to missing hash type.\n");
continue;
}
else if (!(*checksums)->hash)
{
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping resource"
" due to missing hash value.\n");
continue;
}
checksum->type = ((*checksums)->type ? xstrdup ((*checksums)->type) : NULL);
checksum->hash = ((*checksums)->hash ? xstrdup ((*checksums)->hash) : NULL);
checksum->next = (file->checksums);
(file->checksums) = checksum;
}
if((chunk_checksum = (*files)->chunk_checksum))
{
mlink_chunk_checksum *chunk_sum;
if(!chunk_checksum->type)
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping chunk checksum"
" due to missing type information.\n");
else
{
chunk_sum = malloc (sizeof(mlink_chunk_checksum));
chunk_sum->length = chunk_checksum->length;
chunk_sum->type = (chunk_checksum->type ? xstrdup (chunk_checksum->type) : NULL);
for (piece_hashes = chunk_checksum->piece_hashes; *piece_hashes; ++piece_hashes)
{
mlink_piece_hash piece_hash;
if(!chunk_checksum->type)
{
logprintf (LOG_VERBOSE, "PARSE METALINK: Skipping chunk checksum"
" due to missing hash value for piece(%d).\n",
(*piece_hashes)->piece);
free (chunk_sum);
break;
}
}
}
}
}
metalink_delete(metalink);
return mlink;
}
void
elect_resources (mlink *mlink)
{
mlink_file *file = mlink -> files;
mlink_resource *res, *prev;
for (; file; file = file->next)
{
prev = file->resources;
res = prev->next;
while (res)
{
if(strcmp(res->type, "ftp") || strcmp(res->type, "http"))
{
prev->next = res->next;
free (res);
}
else
{
prev = prev->next;
res = prev->next;
}
}
res = file->resources;
if(strcmp(res->type, "ftp") || strcmp(res->type, "http"))
{
file->resources = res->next;
free(res);
}
}
}
void
elect_checksums (mlink *mlink)
{
int i;
mlink_file *file = mlink -> files;
mlink_checksum *csum, *prev;
for (; file; file = file->next)
{
prev = file->checksums;
csum = prev->next;
while (csum)
{
for(i=0; i<HASH_TYPES && strcmp(csum->type, supported_hashes[i]); ++i);
if (i < HASH_TYPES)
{
prev->next = csum->next;
free (csum);
}
else
{
prev = prev->next;
csum = prev->next;
}
}
csum = file->checksums;
for(i=0; i<HASH_TYPES && strcmp(csum->type, supported_hashes[i]); ++i);
if (i < HASH_TYPES)
{
prev->next = csum->next;
free (csum);
}
}
}
void
delete_mlink(mlink *metalink)
{
mlink_file *file, *tempfile;
mlink_resource *res, *tempres;
mlink_checksum *csum, *tempcsum;
mlink_piece_hash *phash, *temphash;
if(!metalink)
return;
xfree_null (metalink->tags);
xfree_null (metalink->identity);
file = metalink->files;
while (file)
{
xfree_null(file->os);
xfree_null(file->language);
xfree_null(file->version);
xfree_null(file->name);
res = file->resources;
while (res)
{
xfree_null (res->url);
xfree_null (res->type);
xfree_null (res->location);
tempres = res;
res = res->next;
free (tempres);
}
csum = file->checksums;
while (csum)
{
xfree_null (csum->type);
xfree_null (csum->hash);
tempcsum = csum;
csum = csum->next;
free (tempcsum);
}
if(file->chunk_checksum)
{
free (file->chunk_checksum->type);
phash = file->chunk_checksum->piece_hashes;
while (phash)
{
xfree_null (phash->hash);
temphash = phash;
phash = phash->next;
free (temphash);
}
}
tempfile = file;
file = file->next;
free (tempfile);
}
free (metalink);
}
/* Parses metalink into type metalink_t and returns a pointer to it.
Returns NULL if the parsing is failed. */
metalink_t*
@ -89,10 +367,10 @@ lower_hex_case (unsigned char *hash, int length)
0 if all pairs of hashes compared turned out to be the same.
1 if due to some error, comparisons could not be made. */
int
verify_file_hash (const char *filename, metalink_checksum_t **checksums)
verify_file_hash (const char *filename, mlink_checksum *checksums)
{
int i, j, req_type, res = 0;
unsigned char hash_raw[MAX_DIGEST_LENGTH];
/* Points to a hash of supported type from the metalink file. The index dedicated
to a type is inversely proportional to its strength. (check supported_types
@ -100,6 +378,7 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
unsigned char *metalink_hashes[HASH_TYPES];
unsigned char file_hash[2 * MAX_DIGEST_LENGTH + 1];
FILE *file;
mlink_checksum *checksum;
if (!checksums)
{
@ -113,9 +392,9 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
metalink_hashes[i] = NULL;
/* Fill metalink_hashes to contain an instance of supported types of hashes. */
for (i = 0; checksums[i] != NULL; ++i)
for (checksum = checksums; checksum; checksum = checksum->next)
for (j = 0; j < HASH_TYPES; ++j)
if (!strcmp(checksums[i]->type, supported_hashes[j]))
if (!strcmp(checksum->type, supported_hashes[j]))
{
if(metalink_hashes[j])
{
@ -128,7 +407,7 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
return 1;
}
else
metalink_hashes[j] = checksums[i]->hash;
metalink_hashes[j] = checksum->hash;
}
for (i = 0; !metalink_hashes[i]; ++i);
@ -166,7 +445,7 @@ verify_file_hash (const char *filename, metalink_checksum_t **checksums)
sprintf(file_hash + 2 * j, "%02x", hash_raw[j]);
lower_hex_case(metalink_hashes[req_type], 2 * digest_sizes[req_type]);
if (strcmp(checksums[req_type]->hash, file_hash))
if (strcmp(metalink_hashes[req_type], file_hash))
{
logprintf (LOG_VERBOSE, "Verifying(%s) failed: %s hashes are different.\n",
filename, supported_hashes[i]);

View File

@ -32,8 +32,71 @@ as that of the covered work. */
#ifndef MLINK_H
#define MLINK_H
typedef struct metalink_piece_hash
{
struct metalink_piece_hash *next;
int piece;
char *hash;
} mlink_piece_hash;
typedef struct metalink_checksum
{
struct metalink_checksum *next;
char *type;
char *hash;
} mlink_checksum;
typedef struct metalink_resource
{
struct metalink_resource *next;
char *url;
char *type;
char *location;
int preference;
int maxconnections;
} mlink_resource;
typedef struct
{
char *type;
int length;
mlink_piece_hash *piece_hashes;
} mlink_chunk_checksum;
typedef struct metalink_file
{
struct metalink_file *next;
char *name;
long long int size;
char *version;
char *language;
char *os;
int maxconnections;
mlink_resource *resources;
int num_of_res;
mlink_checksum *checksums;
int num_of_checksums;
mlink_chunk_checksum *chunk_checksum;
} mlink_file;
typedef struct
{
char *identity;
char *tags;
mlink_file *files;
int num_of_files;
} mlink;
mlink *parse_metalink (char *);
void delete_mlink (mlink *);
metalink_t *metalink_context (const char *);
int verify_file_hash(const char *, metalink_checksum_t **);
int verify_file_hash (const char *, mlink_checksum *);
#endif /* MLINK_H */

View File

@ -1063,18 +1063,16 @@ retrieve_from_file (const char *file, bool html, int *count)
input_file = (char *) file;
#ifdef ENABLE_METALINK
metalink_t *metalink;
mlink *mlink = parse_metalink(input_file);
if(opt.metalink_file && (metalink = metalink_context(input_file)))
if(opt.metalink_file && mlink)
{
int i, j, r, url_err, retries;
int ret, dt = 0;
int ranges_covered, chunk_size, num_of_resources;
int i, j, r, ranges_covered, chunk_size, url_err, retries, ret, dt=0;
pthread_t thread;
sem_t retr_sem;
uerr_t status;
metalink_file_t* file;
metalink_resource_t* resource;
mlink_file* file;
mlink_resource *resource;
struct s_thread_ctx *thread_ctx;
init_temp_files();
@ -1082,20 +1080,17 @@ retrieve_from_file (const char *file, bool html, int *count)
thread_ctx = malloc (opt.jobs * (sizeof *thread_ctx));
retries = 0;
i = 0;
while ((file = metalink->files[i]) != NULL)
file = mlink->files;
while (file)
{
memset(thread_ctx, '\0', opt.jobs * (sizeof *thread_ctx));
num_of_resources = 0;
while (file->resources[num_of_resources])
++num_of_resources;
/* If chunk_size is too small, set it equal to MIN_CHUNK_SIZE. */
chunk_size = (file->size) / opt.jobs;
if(chunk_size < MIN_CHUNK_SIZE)
chunk_size = MIN_CHUNK_SIZE;
j = fill_ranges_data(num_of_resources, file->size, chunk_size);
j = fill_ranges_data(file->num_of_res, file->size, chunk_size);
/* If chunk_size was set to MIN_CHUNK_SIZE, opt.jobs should be corrected. */
if(j < opt.jobs)
@ -1105,16 +1100,16 @@ retrieve_from_file (const char *file, bool html, int *count)
sem_init (&retr_sem, 0, 0);
j = ranges_covered = 0;
resource = file->resources;
/* Assign values to thread_ctx[] elements and spawn threads that will
conduct the download. */
for (r = 0; r < opt.jobs; ++r)
{
resource = file->resources[j];
if (!resource)
{
j = 0;
resource = file->resources[j];
resource = file->resources;
}
thread_ctx[r].referer = NULL;
@ -1138,6 +1133,7 @@ retrieve_from_file (const char *file, bool html, int *count)
return URLERROR;
}
++j;
resource = resource->next;
}
/* Until all the ranges are covered, collect threads. */
@ -1170,18 +1166,24 @@ retrieve_from_file (const char *file, bool html, int *count)
/* Look for resource from which downloading this range is not
tried. */
for (j = 0; j < num_of_resources; ++j)
if (!((thread_ctx[r].range)->resources)[j])
break;
j = 0;
resource = file->resources;
while (j < file->num_of_res)
{
if (!((thread_ctx[r].range)->resources)[j])
break;
++j;
resource = resource -> next;
}
/* If there is such a resource, then update the range values
to try that not-tried resource and spawn thread.
If all the resources are exhausted, stop collecting the
threads, as the download failed. */
if (j < num_of_resources)
if (j < file->num_of_res)
{
thread_ctx[r].url = file->resources[j]->url;
if ((thread_ctx[r].range)->bytes_covered)
{
thread_ctx[r].url = resource->url;
(thread_ctx[r].range)->first_byte =
(thread_ctx[r].range)->bytes_covered;
(thread_ctx[r].range)->bytes_covered = 0;
@ -1219,9 +1221,9 @@ retrieve_from_file (const char *file, bool html, int *count)
{
if(retries < opt.n_retries)
{
--i;
logprintf (LOG_VERBOSE, "Retrying to download(%s). (TRY #%d)\n",
file->name, ++retries + 1);
continue;
}
}
}
@ -1251,8 +1253,9 @@ retrieve_from_file (const char *file, bool html, int *count)
logprintf (LOG_VERBOSE, "Verifying(%s) failed.\n", file->name);
if(retries < opt.n_retries)
{
--i;
logprintf (LOG_VERBOSE, "Retrying to download(%s). (TRY #%d)\n", file->name, ++retries + 1);
logprintf (LOG_VERBOSE, "Retrying to download(%s). (TRY #%d)\n",
file->name, ++retries + 1);
continue;
}
}
}
@ -1265,13 +1268,13 @@ retrieve_from_file (const char *file, bool html, int *count)
status = QUOTEXC;
break;
}
++i;
file = file->next;
}
free(thread_ctx);
clean_ranges ();
clean_temp_files ();
metalink_delete(metalink);
delete_mlink(mlink);
}
else
{