mirror of
https://github.com/mirror/wget.git
synced 2025-02-23 10:10:41 +08:00
Separate segmented download and fallbacking.
This commit is contained in:
parent
f4c42b94d8
commit
44f75a1955
265
src/retr.c
265
src/retr.c
@ -1009,29 +1009,168 @@ retrieve_from_file (const char *file, bool html, int *count)
|
|||||||
else if(metalink = metalink_context(url))
|
else if(metalink = metalink_context(url))
|
||||||
{
|
{
|
||||||
/*GSoC wget*/
|
/*GSoC wget*/
|
||||||
int i,j,k,index,dt,url_err,error_severity;
|
int i, j, k, index, dt, url_err, error_severity;
|
||||||
metalink_file_t* file;
|
metalink_file_t* file;
|
||||||
metalink_resource_t* resource;
|
metalink_resource_t* resource;
|
||||||
struct url *url_parsed;
|
struct url *url_parsed;
|
||||||
uerr_t status, status_least_severe;
|
uerr_t status, status_least_severe;
|
||||||
sem_t retr_sem;
|
int ret, N_THREADS = opt.jobs > 0 ? opt.jobs : 1;
|
||||||
pthread_t thread;
|
int free_threads, range_start, chunk_size, file_extension;
|
||||||
int N_THREADS = 3;
|
struct s_thread_ctx *thread_ctx;
|
||||||
int free_threads = N_THREADS, range_start, chunk_size,file_extension;
|
|
||||||
struct s_thread_ctx *thread_ctx = NULL;
|
|
||||||
char *temp_name;
|
|
||||||
|
|
||||||
|
if(N_THREADS>1)
|
||||||
|
{
|
||||||
|
pthread_t thread;
|
||||||
|
sem_t retr_sem;
|
||||||
|
char *command;
|
||||||
|
|
||||||
|
thread_ctx = calloc (N_THREADS, sizeof *thread_ctx);
|
||||||
i = 0;
|
i = 0;
|
||||||
while((file = metalink->files[i]) != NULL)
|
while((file = metalink->files[i]) != NULL)
|
||||||
{
|
{
|
||||||
if(1)
|
memset(thread_ctx, '\0', N_THREADS * (sizeof *thread_ctx));
|
||||||
|
for(k = 0; k < N_THREADS; ++k)
|
||||||
{
|
{
|
||||||
thread_ctx = calloc (N_THREADS, sizeof *thread_ctx);
|
thread_ctx[k].file = malloc(7 + (N_THREADS/10 + 1) +
|
||||||
|
strlen(file->name));
|
||||||
|
thread_ctx[k].range = malloc(sizeof(struct range));
|
||||||
|
}
|
||||||
sem_init (&retr_sem, 0, 0);
|
sem_init (&retr_sem, 0, 0);
|
||||||
range_start = 0;
|
range_start = 0;
|
||||||
chunk_size = (file->size) / N_THREADS;
|
chunk_size = (file->size) / N_THREADS;
|
||||||
file_extension = 0;
|
file_extension = 0;
|
||||||
|
free_threads = N_THREADS;
|
||||||
|
|
||||||
|
j = 0;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
resource = file->resources[j];
|
||||||
|
|
||||||
|
if (range_start < file->size)
|
||||||
|
{
|
||||||
|
if (free_threads)
|
||||||
|
{
|
||||||
|
if (!resource)
|
||||||
|
j = 0;
|
||||||
|
if (url = resource->url)
|
||||||
|
{
|
||||||
|
for (k = 0; k < N_THREADS; k++)
|
||||||
|
if (! thread_ctx[k].used)
|
||||||
|
{
|
||||||
|
index = k;
|
||||||
|
free_threads--;
|
||||||
|
thread_ctx[k].used = 1;
|
||||||
|
thread_ctx[k].terminated = 0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
url_parsed = url_parse (url, &url_err, iri, true);
|
||||||
|
if (!url_parsed)
|
||||||
|
{
|
||||||
|
char *error = url_error (url, url_err);
|
||||||
|
logprintf (LOG_NOTQUIET, "%s: %s.\n", url,
|
||||||
|
error);
|
||||||
|
xfree (error);
|
||||||
|
return URLERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!opt.base_href)
|
||||||
|
opt.base_href = xstrdup (url);
|
||||||
|
|
||||||
|
sprintf(thread_ctx[index].file, "temp_%s.%d",
|
||||||
|
file->name, file_extension++);
|
||||||
|
thread_ctx[index].referer = NULL;
|
||||||
|
thread_ctx[index].dt = dt;
|
||||||
|
thread_ctx[index].i = iri;
|
||||||
|
thread_ctx[index].redirected = NULL;
|
||||||
|
thread_ctx[index].url = url;
|
||||||
|
thread_ctx[index].retr_sem = &retr_sem;
|
||||||
|
thread_ctx[index].url_parsed = url_parsed;
|
||||||
|
(thread_ctx[index].range)->first_byte = range_start;
|
||||||
|
range_start += chunk_size;
|
||||||
|
(thread_ctx[index].range)->last_byte = range_start - 1;
|
||||||
|
|
||||||
|
pthread_create (&thread, NULL, segmented_retrieve_url,
|
||||||
|
&thread_ctx[index]);
|
||||||
|
|
||||||
|
++j;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* This part seems redundant FOR NOW, as the file is
|
||||||
|
initially divided into size/threads many segments,
|
||||||
|
which means the file size will already be reached
|
||||||
|
once the initial assignment to threads is made. */
|
||||||
|
do
|
||||||
|
ret = sem_wait (&retr_sem);
|
||||||
|
while (ret < 0 && errno == EINTR);
|
||||||
|
|
||||||
|
for (k = 0; k < N_THREADS; k++)
|
||||||
|
if (thread_ctx[k].used && thread_ctx[k].terminated)
|
||||||
|
{
|
||||||
|
thread_ctx[k].used = 0;
|
||||||
|
url_free (thread_ctx[k].url_parsed);
|
||||||
|
free_threads++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while(free_threads < N_THREADS)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
ret = sem_wait (&retr_sem);
|
||||||
|
while (ret < 0 && errno == EINTR);
|
||||||
|
|
||||||
|
for (k = 0; k < N_THREADS; k++)
|
||||||
|
if (thread_ctx[k].used && thread_ctx[k].terminated)
|
||||||
|
{
|
||||||
|
thread_ctx[k].used = 0;
|
||||||
|
url_free (thread_ctx[k].url_parsed);
|
||||||
|
free_threads++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*TODO: Find a way to check the success of downloads.*/
|
||||||
|
status = RETROK;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(k = 0; k < N_THREADS; ++k)
|
||||||
|
{
|
||||||
|
free(thread_ctx[k].file);
|
||||||
|
free(thread_ctx[k].range);
|
||||||
|
}
|
||||||
|
sem_destroy(&retr_sem);
|
||||||
|
|
||||||
|
/* Either all resources are exhausted and the least severe error
|
||||||
|
* among all is returned, or an error irrelevant to the server is.
|
||||||
|
* MUST be reconsidered for multiple files!!!
|
||||||
|
* Can't just exit after 1 failure, if metalink has multiple files.*/
|
||||||
|
if (status != RETROK)
|
||||||
|
return status;
|
||||||
|
|
||||||
|
command = malloc(15 + (N_THREADS) * (strlen(file->name) +
|
||||||
|
(N_THREADS/10 + 1) + 2) + strlen(file->name));
|
||||||
|
sprintf(command, "cat temp_%s.* > %s",file->name , file->name);
|
||||||
|
system(command);
|
||||||
|
sprintf(command, "rm -f temp_%s.*", file->name);
|
||||||
|
system(command);
|
||||||
|
free(command);
|
||||||
|
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
free (thread_ctx);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
i = 0;
|
||||||
|
while((file = metalink->files[i]) != NULL)
|
||||||
|
{
|
||||||
j = 0;
|
j = 0;
|
||||||
while((resource = file->resources[j]) != NULL)
|
while((resource = file->resources[j]) != NULL)
|
||||||
{
|
{
|
||||||
@ -1048,87 +1187,6 @@ retrieve_from_file (const char *file, bool html, int *count)
|
|||||||
if (!opt.base_href)
|
if (!opt.base_href)
|
||||||
opt.base_href = xstrdup (url);
|
opt.base_href = xstrdup (url);
|
||||||
|
|
||||||
if(1)
|
|
||||||
{
|
|
||||||
if (url && free_threads && range_start < (file->size - 1))
|
|
||||||
{
|
|
||||||
for (k = 0; k < N_THREADS; k++)
|
|
||||||
if (! thread_ctx[k].used)
|
|
||||||
{
|
|
||||||
index = k;
|
|
||||||
free_threads--;
|
|
||||||
thread_ctx[k].used = 1;
|
|
||||||
thread_ctx[k].terminated = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
temp_name = malloc(7 + (N_THREADS/10 + 1) + strlen(file->name));
|
|
||||||
sprintf(temp_name, "temp_%s.%d", file->name, file_extension++);
|
|
||||||
thread_ctx[index].file = temp_name;
|
|
||||||
thread_ctx[index].referer = NULL;
|
|
||||||
thread_ctx[index].dt = dt;
|
|
||||||
thread_ctx[index].i = iri;
|
|
||||||
thread_ctx[index].redirected = NULL;
|
|
||||||
thread_ctx[index].url = url;
|
|
||||||
thread_ctx[index].retr_sem = &retr_sem;
|
|
||||||
thread_ctx[index].url_parsed = url_parsed;
|
|
||||||
if(!thread_ctx[index].range)
|
|
||||||
thread_ctx[index].range = malloc(sizeof(struct range));
|
|
||||||
(thread_ctx[index].range)->first_byte = range_start;
|
|
||||||
range_start += chunk_size;
|
|
||||||
(thread_ctx[index].range)->last_byte = range_start -1;
|
|
||||||
|
|
||||||
pthread_create (&thread, NULL, segmented_retrieve_url,
|
|
||||||
&thread_ctx[index]);
|
|
||||||
++j;
|
|
||||||
/* GSoC TODO: Replace this with something better. */
|
|
||||||
/* When the resources are traversed once, return to
|
|
||||||
the first resource to start re-traversing URLs
|
|
||||||
(and assigning them to the threads).*/
|
|
||||||
if(!(file->resources[j]))
|
|
||||||
j=0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If file range is not covered yet, wait until a thread is
|
|
||||||
available. If the file is downloaded, then destroy all
|
|
||||||
the threads. */
|
|
||||||
if(range_start < file->size)
|
|
||||||
{
|
|
||||||
if(!free_threads)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
do
|
|
||||||
ret = sem_wait (&retr_sem);
|
|
||||||
while (ret < 0 && errno == EINTR);
|
|
||||||
|
|
||||||
index = -1;
|
|
||||||
for (k = 0; k < N_THREADS; k++)
|
|
||||||
if (thread_ctx[k].used && thread_ctx[k].terminated)
|
|
||||||
{
|
|
||||||
index = k;
|
|
||||||
thread_ctx[k].used = 0;
|
|
||||||
free_threads++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
while(free_threads < N_THREADS)
|
|
||||||
{
|
|
||||||
ret = sem_wait (&retr_sem);
|
|
||||||
if(ret >= 0 || errno != EINTR)
|
|
||||||
free_threads++;
|
|
||||||
}
|
|
||||||
/*TODO: Find a way to check the success of downloads.*/
|
|
||||||
status = RETROK;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
status = retrieve_url (url_parsed, url, &(file->name), NULL,
|
status = retrieve_url (url_parsed, url, &(file->name), NULL,
|
||||||
NULL, &dt, false, iri, true, NULL);
|
NULL, &dt, false, iri, true, NULL);
|
||||||
url_free (url_parsed);
|
url_free (url_parsed);
|
||||||
@ -1147,35 +1205,14 @@ retrieve_from_file (const char *file, bool html, int *count)
|
|||||||
|
|
||||||
++j;
|
++j;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
/* Either all resources are exhausted and the least severe error
|
|
||||||
* among all is returned, or an error irrelevant to the server is.
|
|
||||||
* MUST be reconsidered for multiple files!!!
|
|
||||||
* Can't just exit after 1 failure, if metalink has multiple files.*/
|
|
||||||
if (status != RETROK)
|
if (status != RETROK)
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
if(thread_ctx)
|
|
||||||
{
|
|
||||||
char *command;
|
|
||||||
*(strrchr(temp_name, '.')) = '\0';
|
|
||||||
command = malloc(9 + (N_THREADS) * (strlen(temp_name) + (N_THREADS/10 + 1) + 2) + strlen(file->name));
|
|
||||||
sprintf(command, "cat %s* > %s",temp_name , file->name);
|
|
||||||
system(command);
|
|
||||||
sprintf(command, "rm -f %s.*", temp_name);
|
|
||||||
system(command);
|
|
||||||
free(command);
|
|
||||||
}
|
|
||||||
for(k = 0; k < N_THREADS; ++k)
|
|
||||||
{
|
|
||||||
xfree (thread_ctx[k].range);
|
|
||||||
free(thread_ctx[k].file);
|
|
||||||
url_free (thread_ctx[k].url_parsed);
|
|
||||||
}
|
|
||||||
|
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
xfree (thread_ctx);
|
}
|
||||||
|
|
||||||
iri_free (iri);
|
iri_free (iri);
|
||||||
/* delete metalink_t */
|
/* delete metalink_t */
|
||||||
metalink_delete(metalink);
|
metalink_delete(metalink);
|
||||||
|
Loading…
Reference in New Issue
Block a user