1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-06-29 01:45:34 +00:00

Bug 744: More tests and fix failures.

This commit is contained in:
Kalle Olavi Niemitalo 2007-07-19 14:28:20 +03:00 committed by Witold Filipczyk
parent fa289ee12a
commit fd9468f0b6
2 changed files with 31 additions and 14 deletions

View File

@ -79,6 +79,10 @@ test_normalize_uri(void)
"http://example.org/baz?a=1&b=2#frag" },
{ "http://example.org/foo/bar/..",
"http://example.org/foo/" },
{ "http://example.org/foo/bar;a=1/..",
"http://example.org/foo/" },
{ "http://example.org/foo/bar..",
"http://example.org/foo/bar.." },
/* Bug 744 - ELinks changes "//" to "/" in path
* component of URI */
@ -116,6 +120,12 @@ test_normalize_uri(void)
"http://example.org///" },
{ "http://example.org/foo/..//bar/baz",
"http://example.org//bar/baz" },
{ "http://example.org//.//foo",
"http://example.org///foo" },
{ "http://example.org//./../foo",
"http://example.org/foo" },
{ "http://example.org/gag///./../..",
"http://example.org/gag/" },
};
size_t i;

View File

@ -43,7 +43,11 @@
static inline int
end_of_dir(unsigned char c)
{
return c == POST_CHAR || c == '#' || c == ';' || c == '?';
/* This used to check for c == ';' as well. But section 3.3
* of RFC 2396 explicitly says that parameters in a path
* segment "are not significant to the parsing of relative
* references." */
return c == POST_CHAR || c == '#' || c == '?';
}
static inline int
@ -700,16 +704,14 @@ normalize_uri(struct uri *uri, unsigned char *uristring)
if (uri->protocol != PROTOCOL_UNKNOWN)
need_slash = get_protocol_need_slash_after_host(uri->protocol);
/* We want to start at the first slash to also reduce URIs like
* http://host//index.html to http://host/index.html */
path = uri->data - need_slash;
dest = src = path;
/* This loop mangles the URI string by removing directory elevators and
* other cruft. Example: /.././etc////..//usr/ -> /usr/ */
/* This loop mangles the URI string by removing ".." and "." segments.
* However it must not alter "//" without reason; see bug 744. */
while (*dest) {
/* If the following pieces are the LAST parts of URL, we remove
* them as well. See RFC 1808 for details. */
* them as well. See RFC 2396 section 5.2 for details. */
if (end_of_dir(src[0])) {
/* URL data contains no more path. */
@ -734,20 +736,25 @@ normalize_uri(struct uri *uri, unsigned char *uristring)
} else if (src[2] == '.'
&& (is_uri_dir_sep(uri, src[3]) || !src[3])) {
/* /../ or /.. - skip it and preceding element. */
/* /../ or /.. - skip it and preceding element.
*
* <path> "/foo/bar" <dest> ...
* <src> ("/../" or "/..\0") ...
*
* Remove "bar" and the directory
* separator that precedes it. The
* separator will be added back in the
* next iteration unless another ".."
* follows, in which case it will be
* added later. "bar" may be empty. */
/* First back out the last incrementation of
* @dest (dest++) to get the position that was
* last asigned to. */
if (dest > path) dest--;
/* @dest might be pointing to a dir separator
* so we decrement before any testing. */
while (dest > path) {
dest--;
if (is_uri_dir_sep(uri, *dest)) break;
}
/* <path> "/foo" <dest> "/bar" ...
* <src> ("/../" or "/..\0") ... */
if (!src[3]) {
/* /.. - add ending slash and stop */
*dest++ = *src;