From 22e8a8347254b816ec8addeaab3d3c66aa901d4d Mon Sep 17 00:00:00 2001 From: Witold Filipczyk Date: Wed, 1 Jun 2022 22:24:07 +0200 Subject: [PATCH] [idn] Convert domain name to utf-8 from gettext codepage. --- src/document/document.c | 12 +--------- src/protocol/uri.c | 49 +++++++++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/document/document.c b/src/document/document.c index 2a616b59..8a3d94b6 100644 --- a/src/document/document.c +++ b/src/document/document.c @@ -112,20 +112,10 @@ get_ip(struct document *document) { #ifdef HAVE_INET_NTOP struct uri *uri = document->uri; - char *host = memacpy(uri->host, uri->hostlen); + char *host = get_uri_string(uri, URI_DNS_HOST); if (host) { -#ifdef CONFIG_IDN - char *idname; - int code = idna_to_ascii_lz(host, &idname, 0); - - if (code == IDNA_SUCCESS) { - find_host(idname, &document->querydns, found_dns, &document->ip, 0); - free(idname); - } -#else find_host(host, &document->querydns, found_dns, &document->ip, 0); -#endif mem_free(host); } #endif diff --git a/src/protocol/uri.c b/src/protocol/uri.c index 5c897987..9c81c28c 100644 --- a/src/protocol/uri.c +++ b/src/protocol/uri.c @@ -6,6 +6,9 @@ #include #include +#ifdef HAVE_ICONV +#include +#endif #ifdef HAVE_IDNA_H #include #endif @@ -29,6 +32,7 @@ #include "elinks.h" +#include "intl/libintl.h" #include "main/object.h" #include "protocol/protocol.h" #include "protocol/uri.h" @@ -535,17 +539,46 @@ add_uri_to_string(struct string *string, const struct uri *uri, /* Support for the GNU International Domain Name library. * * http://www.gnu.org/software/libidn/manual/html_node/IDNA-Functions.html - * - * Now it is probably not perfect because idna_to_ascii_lz() - * will be using a ``zero terminated input string encoded in - * the current locale's character set''. Anyway I don't know - * how to convert anything to UTF-8 or Unicode. --jonas */ + */ if (wants(URI_IDN)) { - char *host = memacpy(uri->host, uri->hostlen); + char *host = NULL; +#if defined(CONFIG_NLS) || defined(CONFIG_GETTEXT) + if (current_charset != -1 && !is_cp_utf8(current_charset)) { + size_t iconv_res; + size_t ileft = uri->hostlen; + size_t oleft = ileft * 8; + char *inbuf, *outbuf; + char *utf8_data = (char *)mem_calloc(1, oleft); + iconv_t cd; + + if (!utf8_data) { + goto error; + } + cd = iconv_open("utf-8", get_cp_mime_name(current_charset)); + if (cd == (iconv_t)-1) { + mem_free(utf8_data); + goto error; + } + inbuf = uri->host; + outbuf = utf8_data; + iconv_res = iconv(cd, &inbuf, &ileft, &outbuf, &oleft); + + if (iconv_res == -1) { + mem_free(utf8_data); + goto error; + } + iconv_close(cd); + host = utf8_data; + } +error: +#endif + if (!host) { + host = memacpy(uri->host, uri->hostlen); + } if (host) { char *idname; - int code = idna_to_ascii_lz(host, &idname, 0); + int code = idna_to_ascii_8z(host, &idname, 0); /* FIXME: Return NULL if it coughed? --jonas */ if (code == IDNA_SUCCESS) { @@ -553,11 +586,9 @@ add_uri_to_string(struct string *string, const struct uri *uri, free(idname); add_host = 0; } - mem_free(host); } } - #endif if (add_host) add_bytes_to_string(string, uri->host, uri->hostlen);