From b9eeb0c54c390aa267355deeaa7c6e2520813e1c Mon Sep 17 00:00:00 2001
From: hniksic <devnull@localhost>
Date: Wed, 1 Nov 2000 10:31:53 -0800
Subject: [PATCH] [svn] Fix "optimization" of query-strings in URLs. Published
 in <sxs3dhbwnmw.fsf@florida.arsdigita.de>.

---
 src/ChangeLog |  8 ++++++
 src/url.c     | 69 +++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index f90e5ecf..7bb0d9e4 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,11 @@
+2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
+
+	* url.c (path_simplify_with_kludge): New function.
+	(path_simplify_with_kludge): Disable it.  Instead...
+	(parse_dir): ...make sure that at this point the right thing is
+	done, i.e. that "query" part of the URL (?...) is always assigned
+	to the file, never to the directory portion of the path.
+
 2000-11-01  Hrvoje Niksic  <hniksic@arsdigita.com>
 
 	* retr.c (retrieve_url): Detect redirection cycles.
diff --git a/src/url.c b/src/url.c
index 0a9fa4da..5447c104 100644
--- a/src/url.c
+++ b/src/url.c
@@ -5,8 +5,8 @@ This file is part of Wget.
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -82,6 +82,11 @@ extern int errno;
 /* Is a directory ".."?  */
 #define DDOTP(x) ((*(x) == '.') && (*(x + 1) == '.') && (!*(x + 2)))
 
+#if 0
+static void path_simplify_with_kludge PARAMS ((char *));
+#endif
+static int urlpath_length PARAMS ((const char *));
+
 /* NULL-terminated list of strings to be recognized as prototypes (URL
    schemes).  Note that recognized doesn't mean supported -- only HTTP
    and FTP are currently supported.
@@ -503,11 +508,18 @@ parseurl (const char *url, struct urlinfo *u, int strict)
   strcat (u->path, *u->dir ? "/" : "");
   strcat (u->path, u->file);
   URL_CLEANSE (u->path);
+  DEBUGP (("newpath: %s\n", u->path));
   /* Create the clean URL.  */
   u->url = str_url (u, 0);
   return URLOK;
 }
 
+/* Special versions of DOTP and DDOTP for parse_dir(). */
+
+#define PD_DOTP(x)  ((*(x) == '.') && (!*((x) + 1) || *((x) + 1) == '?'))
+#define PD_DDOTP(x) ((*(x) == '.') && (*(x) == '.')		\
+		     && (!*((x) + 2) || *((x) + 2) == '?'))
+
 /* Build the directory and filename components of the path.  Both
    components are *separately* malloc-ed strings!  It does not change
    the contents of path.
@@ -519,13 +531,16 @@ parse_dir (const char *path, char **dir, char **file)
 {
   int i, l;
 
-  for (i = l = strlen (path); i && path[i] != '/'; i--);
+  l = urlpath_length (path);
+  for (i = l; i && path[i] != '/'; i--);
+
   if (!i && *path != '/')   /* Just filename */
     {
-      if (DOTP (path) || DDOTP (path))
+      if (PD_DOTP (path) || PD_DDOTP (path))
 	{
-	  *dir = xstrdup (path);
-	  *file = xstrdup ("");
+	  *dir = strdupdelim (path, path + l);
+	  *file = xstrdup (path + l); /* normally empty, but could
+                                         contain ?... */
 	}
       else
 	{
@@ -535,10 +550,11 @@ parse_dir (const char *path, char **dir, char **file)
     }
   else if (!i)                 /* /filename */
     {
-      if (DOTP (path + 1) || DDOTP (path + 1))
+      if (PD_DOTP (path + 1) || PD_DDOTP (path + 1))
 	{
-	  *dir = xstrdup (path);
-	  *file = xstrdup ("");
+	  *dir = strdupdelim (path, path + l);
+	  *file = xstrdup (path + l); /* normally empty, but could
+                                         contain ?... */
 	}
       else
 	{
@@ -548,15 +564,16 @@ parse_dir (const char *path, char **dir, char **file)
     }
   else /* Nonempty directory with or without a filename */
     {
-      if (DOTP (path + i + 1) || DDOTP (path + i + 1))
+      if (PD_DOTP (path + i + 1) || PD_DDOTP (path + i + 1))
 	{
-	  *dir = xstrdup (path);
-	  *file = xstrdup ("");
+	  *dir = strdupdelim (path, path + l);
+	  *file = xstrdup (path + l); /* normally empty, but could
+                                         contain ?... */
 	}
       else
 	{
 	  *dir = strdupdelim (path, path + i);
-	  *file = strdupdelim (path + i + 1, path + l + 1);
+	  *file = xstrdup (path + i + 1);
 	}
     }
 }
@@ -1448,6 +1465,32 @@ opt_url (struct urlinfo *u)
   free (u->url);
   u->url = str_url (u, 0);
 }
+
+/* This beautiful kludge is fortunately not needed, as I've made
+   parse_dir do the (almost) right thing, so that a query can never
+   become a part of directory.  */
+#if 0
+/* Call path_simplify, but make sure that the part after the
+   question-mark, if any, is not destroyed by path_simplify's
+   "optimizations".  */
+void
+path_simplify_with_kludge (char *path)
+{
+  char *query = strchr (path, '?');
+  if (query)
+    /* path_simplify also works destructively, so we also have the
+       license to write. */
+    *query = '\0';
+  path_simplify (path);
+  if (query)
+    {
+      char *newend = path + strlen (path);
+      *query = '?';
+      if (newend != query)
+	memmove (newend, query, strlen (query) + 1);
+    }
+}
+#endif
 
 /* Returns proxy host address, in accordance with PROTO.  */
 char *