diff --git a/src/document/html/parser/parse.c b/src/document/html/parser/parse.c
index fcf343213..6cf2f8e32 100644
--- a/src/document/html/parser/parse.c
+++ b/src/document/html/parser/parse.c
@@ -151,7 +151,7 @@ next_attr:
n = name;
name_start = e;
- while (atchr(*n) && atchr(*e) && toupper(*e) == toupper(*n)) e++, n++;
+ while (atchr(*n) && atchr(*e) && c_toupper(*e) == c_toupper(*n)) e++, n++;
found = !*n && !atchr(*e);
if (found && (flags & HTML_ATTR_TEST)) return name_start;
@@ -504,8 +504,8 @@ static struct element_info elements[] = {
static int
compar(const void *a, const void *b)
{
- return strcasecmp(((struct element_info *) a)->name,
- ((struct element_info *) b)->name);
+ return c_strcasecmp(((struct element_info *) a)->name,
+ ((struct element_info *) b)->name);
}
#else
@@ -548,7 +548,7 @@ void
init_tags_lookup(void)
{
#ifdef USE_FASTFIND
- fastfind_index(&ff_tags_index, FF_COMPRESS);
+ fastfind_index(&ff_tags_index, FF_COMPRESS | FF_LOCALE_INDEP);
#endif
}
diff --git a/src/util/conv.c b/src/util/conv.c
index e35ecdce4..13e9f87b7 100644
--- a/src/util/conv.c
+++ b/src/util/conv.c
@@ -534,3 +534,99 @@ sanitize_url(unsigned char *url)
return 1;
}
+
+int c_tolower(int c) {
+ switch (c)
+ {
+ case 'A': return 'a';
+ case 'B': return 'b';
+ case 'C': return 'c';
+ case 'D': return 'd';
+ case 'E': return 'e';
+ case 'F': return 'f';
+ case 'G': return 'g';
+ case 'H': return 'h';
+ case 'I': return 'i';
+ case 'J': return 'j';
+ case 'K': return 'k';
+ case 'L': return 'l';
+ case 'M': return 'm';
+ case 'N': return 'n';
+ case 'O': return 'o';
+ case 'P': return 'p';
+ case 'Q': return 'q';
+ case 'R': return 'r';
+ case 'S': return 's';
+ case 'T': return 't';
+ case 'U': return 'u';
+ case 'V': return 'v';
+ case 'W': return 'w';
+ case 'X': return 'x';
+ case 'Y': return 'y';
+ case 'Z': return 'z';
+ default: return c;
+ }
+}
+
+int c_toupper(int c) {
+ switch (c) {
+ case 'a': return 'A';
+ case 'b': return 'B';
+ case 'c': return 'C';
+ case 'd': return 'D';
+ case 'e': return 'E';
+ case 'f': return 'F';
+ case 'g': return 'G';
+ case 'h': return 'H';
+ case 'i': return 'I';
+ case 'j': return 'J';
+ case 'k': return 'K';
+ case 'l': return 'L';
+ case 'm': return 'M';
+ case 'n': return 'N';
+ case 'o': return 'O';
+ case 'p': return 'P';
+ case 'q': return 'Q';
+ case 'r': return 'R';
+ case 's': return 'S';
+ case 't': return 'T';
+ case 'u': return 'U';
+ case 'v': return 'V';
+ case 'w': return 'W';
+ case 'x': return 'X';
+ case 'y': return 'Y';
+ case 'z': return 'Z';
+ default: return c;
+ }
+}
+
+int c_isupper (int c)
+{
+ switch (c)
+ {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int c_islower (int c)
+{
+ switch (c)
+ {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
diff --git a/src/util/conv.h b/src/util/conv.h
index 605388bb4..b364f1987 100644
--- a/src/util/conv.h
+++ b/src/util/conv.h
@@ -182,6 +182,16 @@ trim_chars(unsigned char *s, unsigned char c, int *len)
return s;
}
+/* Convert a character to {lower|upper}case using the
+ * ASCII character set (as if in the C locale) */
+int c_tolower(int c);
+int c_toupper(int c);
+
+/* Check whether a character is {lower|upper}case using the
+ * the ASCII character set (as if in the C locale) */
+int c_islower(int c);
+int c_isupper(int c);
+
/** Convert uppercase letters in @a string with the given @a length to
* lowercase. */
static inline void
@@ -192,6 +202,16 @@ convert_to_lowercase(unsigned char *string, int length)
string[length] = tolower(string[length]);
}
+/* Convert uppercase letters in @string with the given @length to lowercase
+ * using the ASCII character set (as if in the C locale) */
+static inline void
+convert_to_lowercase_locale_indep(unsigned char *string, int length)
+{
+ for (length--; length >= 0; length--)
+ if (c_isupper(string[length]))
+ string[length] = c_tolower(string[length]);
+}
+
/** This function drops control chars, nbsp char and limit the number
* of consecutive space chars to one. It modifies its argument. */
void clr_spaces(unsigned char *str);
diff --git a/src/util/fastfind.c b/src/util/fastfind.c
index 17ac97ed8..a55b6d912 100644
--- a/src/util/fastfind.c
+++ b/src/util/fastfind.c
@@ -169,6 +169,7 @@ struct fastfind_info {
int leafsets_count;
unsigned int case_aware:1;
+ unsigned int locale_indep:1;
unsigned int compress:1;
int idxtab[FF_MAX_CHARS];
@@ -233,6 +234,7 @@ FF_DBG_dump_stats(struct fastfind_info *info)
fprintf(stderr, "------ FastFind Statistics ------\n");
fprintf(stderr, "Comment : %s\n", info->debug.comment);
fprintf(stderr, "Case-aware : %s\n", info->case_aware ? "yes" : "no");
+ fprintf(stderr, "Locale-indep: %s\n", info->locale_indep ? "yes" : "no");
fprintf(stderr, "Compress : %s\n", info->compress ? "yes" : "no");
fprintf(stderr, "Uniq_chars : %s\n", info->uniq_chars);
fprintf(stderr, "Uniq_chars #: %d/%d max.\n", info->uniq_chars_count, FF_MAX_CHARS);
@@ -292,6 +294,7 @@ init_fastfind(struct fastfind_index *index, enum fastfind_flags flags)
info->min_key_len = FF_MAX_KEYLEN;
info->case_aware = !!(flags & FF_CASE_AWARE);
+ info->locale_indep = !!(flags & FF_LOCALE_INDEP);
info->compress = !!(flags & FF_COMPRESS);
FF_DBG_mem(info, sizeof(*info) - sizeof(info->debug));
@@ -434,7 +437,7 @@ compress_tree(struct ff_node *leafset, struct fastfind_info *info)
}
}
-#define ifcase(c) (info->case_aware ? (c) : toupper(c))
+#define ifcase(c) ( info->case_aware ? (c) : (info->locale_indep ? c_toupper(c) : toupper(c)) )
struct fastfind_index *
fastfind_index(struct fastfind_index *index, enum fastfind_flags flags)
@@ -622,7 +625,10 @@ fastfind_search(struct fastfind_index *index,
if (info->case_aware)
FF_SEARCH(key[i]);
else
- FF_SEARCH(toupper(key[i]));
+ if (info->locale_indep)
+ FF_SEARCH(c_toupper(key[i]));
+ else
+ FF_SEARCH(toupper(key[i]));
return NULL;
}
diff --git a/src/util/fastfind.h b/src/util/fastfind.h
index a6b855854..b01427828 100644
--- a/src/util/fastfind.h
+++ b/src/util/fastfind.h
@@ -19,6 +19,8 @@ enum fastfind_flags {
FF_NONE = 0,
FF_CASE_AWARE = 1, /**< honour case when comparing */
FF_COMPRESS = 2, /**< compress nodes if possible */
+ FF_LOCALE_INDEP = 4 /**< whether the case conversion is
+ * locale independent or not */
};
struct fastfind_index {
diff --git a/src/util/string.c b/src/util/string.c
index 076c491b8..d4ef1c63f 100644
--- a/src/util/string.c
+++ b/src/util/string.c
@@ -14,6 +14,7 @@
#include
#include
#include
+#include
#include "elinks.h"
@@ -233,11 +234,91 @@ elinks_strlcmp(const unsigned char *s1, size_t n1,
int
elinks_strlcasecmp(const unsigned char *s1, size_t n1,
- const unsigned char *s2, size_t n2)
+ const unsigned char *s2, size_t n2,
+ const int locale_indep)
{
- strlcmp_device("strlcasecmp", s1, n1, s2, n2, toupper(s1[p]), toupper(s2[p]));
+ if (locale_indep) {
+ strlcmp_device("strlcasecmp", s1, n1, s2, n2, c_toupper(s1[p]), c_toupper(s2[p]));
+ }
+ else {
+ strlcmp_device("strlcasecmp", s1, n1, s2, n2, toupper(s1[p]), toupper(s2[p]));
+ }
}
+/* c_strcasecmp
+ * Taken from GNU coreutils (version 6.9)
+ * File name: lib/c-strcasecmp.c
+ * Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
+ * Licensed under the GPL version 2 or any later version.
+ */
+int c_strcasecmp (const char *s1, const char *s2)
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+
+ if (p1 == p2)
+ return 0;
+
+ do
+ {
+ c1 = c_tolower (*p1);
+ c2 = c_tolower (*p2);
+
+ if (c1 == '\0')
+ break;
+
+ ++p1;
+ ++p2;
+ }
+ while (c1 == c2);
+
+ if (UCHAR_MAX <= INT_MAX)
+ return c1 - c2;
+ else
+ /* On machines where 'char' and 'int' are types of the same size, the
+ difference of two 'unsigned char' values - including the sign bit -
+ doesn't fit in an 'int'. */
+ return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+}
+
+/* c_strncasecmp
+ * Taken from GNU coreutils (version 6.9)
+ * File name: lib/c-strncasecmp.c
+ * ^ (note the "n")
+ * Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
+ * Licensed under the GPL version 2 or any later version.
+ */
+int c_strncasecmp (const char *s1, const char *s2, size_t n)
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+
+ if (p1 == p2 || n == 0)
+ return 0;
+
+ do
+ {
+ c1 = c_tolower (*p1);
+ c2 = c_tolower (*p2);
+
+ if (--n == 0 || c1 == '\0')
+ break;
+
+ ++p1;
+ ++p2;
+ }
+ while (c1 == c2);
+
+ if (UCHAR_MAX <= INT_MAX)
+ return c1 - c2;
+ else
+ /* On machines where 'char' and 'int' are types of the same size, the
+ difference of two 'unsigned char' values - including the sign bit -
+ doesn't fit in an 'int'. */
+ return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+}
/* The new string utilities: */
diff --git a/src/util/string.h b/src/util/string.h
index 0008863d5..243a348c2 100644
--- a/src/util/string.h
+++ b/src/util/string.h
@@ -100,9 +100,16 @@ int elinks_strlcmp(const unsigned char *s1, size_t n1,
const unsigned char *s2, size_t n2);
/** Acts identically to strlcmp(), except for being case insensitive. */
-#define strlcasecmp(a,b,c,d) (errfile = __FILE__, errline = __LINE__, elinks_strlcasecmp(a,b,c,d))
+#define strlcasecmp(a,b,c,d) (errfile = __FILE__, errline = __LINE__, elinks_strlcasecmp(a,b,c,d,0))
+#define c_strlcasecmp(a,b,c,d) (errfile = __FILE__, errline = __LINE__, elinks_strlcasecmp(a,b,c,d,1))
int elinks_strlcasecmp(const unsigned char *s1, size_t n1,
- const unsigned char *s2, size_t n2);
+ const unsigned char *s2, size_t n2,
+ const int locale_indep);
+
+/* strcasecmp and strncasecmp which work as if they are
+ * in the C locale - both taken from GNU coreutils */
+int c_strcasecmp(const char *s1, const char *s2);
+int c_strncasecmp(const char *s1, const char *s2, size_t n);
/** @} */