1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-12-04 14:46:47 -05:00

bug 153, 1066: Convert properties of SMJS bookmark to/from UTF-8.

SpiderMonkey uses UTF-16 and the strings in struct bookmark are in
UTF-8.  Previously, the conversions behaved as if the strings had been
in ISO-8859-1.

SpiderMonkey also supports JS_SetCStringsAreUTF8(), which would make
the existing functions convert between UTF-16 and UTF-8, but that
effect is global so I dare not enable it yet.  Besides, I don't know
if that function works in all the SpiderMonkey versions that ELinks
claims to work with.
This commit is contained in:
Kalle Olavi Niemitalo 2008-10-20 01:09:45 +03:00 committed by Kalle Olavi Niemitalo
parent 03b112796d
commit 97d72d15a0
4 changed files with 241 additions and 34 deletions

View File

@ -525,8 +525,16 @@ update_bookmark(struct bookmark *bm, int codepage,
return 1; return 1;
} }
/* Search for a bookmark with the given title. Search in the given folder /** Search for a bookmark with the given title. The search does not
* or in the root if folder is NULL. */ * recurse into subfolders.
*
* @param folder
* Search in this folder. NULL means search in the root.
*
* @param title
* Search for this title. Must be in UTF-8 and not NULL.
*
* @return The bookmark, or NULL if not found. */
struct bookmark * struct bookmark *
get_bookmark_by_name(struct bookmark *folder, unsigned char *title) get_bookmark_by_name(struct bookmark *folder, unsigned char *title)
{ {
@ -535,7 +543,6 @@ get_bookmark_by_name(struct bookmark *folder, unsigned char *title)
lh = folder ? &folder->child : &bookmarks; lh = folder ? &folder->child : &bookmarks;
/** @todo Bug 153: bookmark->title should be UTF-8 */
foreach (bookmark, *lh) foreach (bookmark, *lh)
if (!strcmp(bookmark->title, title)) return bookmark; if (!strcmp(bookmark->title, title)) return bookmark;

View File

@ -8,6 +8,7 @@
#include "bookmarks/bookmarks.h" #include "bookmarks/bookmarks.h"
#include "ecmascript/spidermonkey-shared.h" #include "ecmascript/spidermonkey-shared.h"
#include "intl/charsets.h"
#include "main/event.h" #include "main/event.h"
#include "scripting/smjs/core.h" #include "scripting/smjs/core.h"
#include "scripting/smjs/elinks_object.h" #include "scripting/smjs/elinks_object.h"
@ -78,6 +79,60 @@ static const JSPropertySpec bookmark_props[] = {
static JSObject *smjs_get_bookmark_folder_object(struct bookmark *bookmark); static JSObject *smjs_get_bookmark_folder_object(struct bookmark *bookmark);
/** Convert a string retrieved from struct bookmark to a jsval.
*
* @return JS_TRUE if successful. On error, report the error and
* return JS_FALSE. */
static JSBool
bookmark_string_to_jsval(JSContext *ctx, const unsigned char *str, jsval *vp)
{
JSString *jsstr = utf8_to_jsstring(ctx, str, -1);
if (jsstr == NULL)
return JS_FALSE;
*vp = STRING_TO_JSVAL(jsstr);
return JS_TRUE;
}
/** Convert a jsval to a string and store it in struct bookmark.
*
* @param ctx
* Context for memory allocations and error reports.
* @param val
* The @c jsval that should be converted.
* @param[in,out] result
* A string allocated with mem_alloc().
* On success, this function frees the original string, if any.
*
* @return JS_TRUE if successful. On error, report the error to
* SpiderMonkey and return JS_FALSE. */
static JSBool
jsval_to_bookmark_string(JSContext *ctx, jsval val, unsigned char **result)
{
JSString *jsstr = NULL;
unsigned char *str;
/* jsstring_to_utf8() might GC; protect the string to come. */
if (!JS_AddNamedRoot(ctx, &jsstr, "jsval_to_bookmark_string"))
return JS_FALSE;
jsstr = JS_ValueToString(ctx, val);
if (jsstr == NULL) {
JS_RemoveRoot(ctx, &jsstr);
return JS_FALSE;
}
str = jsstring_to_utf8(ctx, jsstr, NULL);
if (str == NULL) {
JS_RemoveRoot(ctx, &jsstr);
return JS_FALSE;
}
JS_RemoveRoot(ctx, &jsstr);
mem_free_set(result, str);
return JS_TRUE;
}
/* @bookmark_class.getProperty */ /* @bookmark_class.getProperty */
static JSBool static JSBool
bookmark_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp) bookmark_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
@ -102,17 +157,9 @@ bookmark_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
switch (JSVAL_TO_INT(id)) { switch (JSVAL_TO_INT(id)) {
case BOOKMARK_TITLE: case BOOKMARK_TITLE:
/** @todo Bug 153: bookmark->title should be UTF-8 */ return bookmark_string_to_jsval(ctx, bookmark->title, vp);
*vp = STRING_TO_JSVAL(JS_NewStringCopyZ(smjs_ctx,
bookmark->title));
return JS_TRUE;
case BOOKMARK_URL: case BOOKMARK_URL:
/** @todo Bug 1066: bookmark->url should be UTF-8 */ return bookmark_string_to_jsval(ctx, bookmark->url, vp);
*vp = STRING_TO_JSVAL(JS_NewStringCopyZ(smjs_ctx,
bookmark->url));
return JS_TRUE;
case BOOKMARK_CHILDREN: case BOOKMARK_CHILDREN:
*vp = OBJECT_TO_JSVAL(smjs_get_bookmark_folder_object(bookmark)); *vp = OBJECT_TO_JSVAL(smjs_get_bookmark_folder_object(bookmark));
@ -149,24 +196,10 @@ bookmark_set_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
return JS_FALSE; return JS_FALSE;
switch (JSVAL_TO_INT(id)) { switch (JSVAL_TO_INT(id)) {
case BOOKMARK_TITLE: { case BOOKMARK_TITLE:
JSString *jsstr = JS_ValueToString(smjs_ctx, *vp); return jsval_to_bookmark_string(ctx, *vp, &bookmark->title);
unsigned char *str = JS_GetStringBytes(jsstr); case BOOKMARK_URL:
return jsval_to_bookmark_string(ctx, *vp, &bookmark->url);
/** @todo Bug 153: bookmark->title should be UTF-8 */
mem_free_set(&bookmark->title, stracpy(str));
return JS_TRUE;
}
case BOOKMARK_URL: {
JSString *jsstr = JS_ValueToString(smjs_ctx, *vp);
unsigned char *str = JS_GetStringBytes(jsstr);
/** @todo Bug 1066: bookmark->url should be UTF-8 */
mem_free_set(&bookmark->url, stracpy(str));
return JS_TRUE;
}
default: default:
/* Unrecognized integer property ID; someone is using /* Unrecognized integer property ID; someone is using
* the object as an array. SMJS builtin classes (e.g. * the object as an array. SMJS builtin classes (e.g.
@ -209,7 +242,7 @@ bookmark_folder_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
{ {
struct bookmark *bookmark; struct bookmark *bookmark;
struct bookmark *folder; struct bookmark *folder;
unsigned char *title; unsigned char *title = NULL;
/* This can be called if @obj if not itself an instance of the /* This can be called if @obj if not itself an instance of the
* appropriate class but has one in its prototype chain. Fail * appropriate class but has one in its prototype chain. Fail
@ -222,14 +255,15 @@ bookmark_folder_get_property(JSContext *ctx, JSObject *obj, jsval id, jsval *vp)
*vp = JSVAL_NULL; *vp = JSVAL_NULL;
title = JS_GetStringBytes(JS_ValueToString(ctx, id)); if (!jsval_to_bookmark_string(ctx, id, &title))
if (!title) return JS_TRUE; return JS_FALSE;
bookmark = get_bookmark_by_name(folder, title); bookmark = get_bookmark_by_name(folder, title);
if (bookmark) { if (bookmark) {
*vp = OBJECT_TO_JSVAL(smjs_get_bookmark_object(bookmark)); *vp = OBJECT_TO_JSVAL(smjs_get_bookmark_object(bookmark));
} }
mem_free(title);
return JS_TRUE; return JS_TRUE;
} }

View File

@ -8,6 +8,7 @@
#include "config/home.h" #include "config/home.h"
#include "ecmascript/spidermonkey-shared.h" #include "ecmascript/spidermonkey-shared.h"
#include "intl/charsets.h"
#include "main/module.h" #include "main/module.h"
#include "osdep/osdep.h" #include "osdep/osdep.h"
#include "scripting/scripting.h" #include "scripting/scripting.h"
@ -163,3 +164,163 @@ cleanup_smjs(struct module *module)
JS_DestroyContext(smjs_ctx); JS_DestroyContext(smjs_ctx);
spidermonkey_runtime_release(); spidermonkey_runtime_release();
} }
/** Convert a UTF-8 string to a JSString.
*
* @param ctx
* Allocate the string in this JSContext.
* @param[in] str
* The input string that should be converted.
* @param[in] length
* Length of @a str in bytes, or -1 if it is null-terminated.
*
* @return the new string. On error, report the error to SpiderMonkey
* and return NULL. */
JSString *
utf8_to_jsstring(JSContext *ctx, const unsigned char *str, int length)
{
size_t in_bytes;
const unsigned char *in_end;
size_t utf16_alloc;
jschar *utf16;
size_t utf16_used;
JSString *jsstr;
if (length == -1)
in_bytes = strlen(str);
else
in_bytes = length;
/* Each byte of input can become at most one UTF-16 unit.
* Check whether the multiplication could overflow. */
assert(!needs_utf16_surrogates(UCS_REPLACEMENT_CHARACTER));
if (in_bytes > ((size_t) -1) / sizeof(jschar)) {
JS_ReportAllocationOverflow(ctx);
return NULL;
}
utf16_alloc = in_bytes;
/* Don't use fmem_alloc here because long strings could
* exhaust the stack. */
utf16 = mem_alloc(utf16_alloc * sizeof(jschar));
if (utf16 == NULL) {
JS_ReportOutOfMemory(ctx);
return NULL;
}
in_end = str + in_bytes;
utf16_used = 0;
for (;;) {
unicode_val_T unicode;
unicode = utf8_to_unicode((unsigned char **) &str, in_end);
if (unicode == UCS_NO_CHAR)
break;
if (needs_utf16_surrogates(unicode)) {
assert(utf16_alloc - utf16_used >= 2);
if_assert_failed { mem_free(utf16); return NULL; }
utf16[utf16_used++] = get_utf16_high_surrogate(unicode);
utf16[utf16_used++] = get_utf16_low_surrogate(unicode);
} else {
assert(utf16_alloc - utf16_used >= 1);
if_assert_failed { mem_free(utf16); return NULL; }
utf16[utf16_used++] = unicode;
}
}
jsstr = JS_NewUCString(ctx, utf16, utf16_used);
mem_free(utf16);
return jsstr;
}
/** Convert a jschar array to UTF-8 and append it to struct string.
* Replace misused surrogate codepoints with UCS_REPLACEMENT_CHARACTER.
*
* @param[in,out] utf8
* The function appends characters to this UTF-8 string.
*
* @param[in] utf16
* Pointer to the first element in an array of jschars.
*
* @param[i] len
* Number of jschars in the @a utf16 array.
*
* @return @a utf8 if successful, or NULL if not. */
static struct string *
add_jschars_to_utf8_string(struct string *utf8,
const jschar *utf16, size_t len)
{
size_t pos;
for (pos = 0; pos < len; ) {
unicode_val_T unicode = utf16[pos++];
if (is_utf16_surrogate(unicode)) {
if (is_utf16_high_surrogate(unicode)
&& pos < len
&& is_utf16_low_surrogate(utf16[pos])) {
unicode = join_utf16_surrogates(unicode,
utf16[pos++]);
} else {
unicode = UCS_REPLACEMENT_CHARACTER;
}
}
if (unicode == 0) {
if (!add_char_to_string(utf8, '\0'))
return NULL;
} else {
if (!add_to_string(utf8, encode_utf8(unicode)))
return NULL;
}
}
return utf8;
}
/** Convert a JSString to a UTF-8 string.
*
* @param ctx
* For reporting errors.
* @param[in] jsstr
* The input string that should be converted. Must not be NULL.
* @param[out] length
* Optional. The number of bytes in the returned string,
* not counting the terminating null.
*
* @return the new string, which the caller must eventually free
* with mem_free(). On error, report the error to SpiderMonkey
* and return NULL; *@a length is then undefined. */
unsigned char *
jsstring_to_utf8(JSContext *ctx, JSString *jsstr, int *length)
{
size_t utf16_len;
const jschar *utf16;
struct string utf8;
utf16_len = JS_GetStringLength(jsstr);
utf16 = JS_GetStringChars(jsstr); /* stays owned by jsstr */
if (utf16 == NULL) {
/* JS_GetStringChars doesn't have a JSContext *
* parameter so it can't report the error
* (and can't collect garbage either). */
JS_ReportOutOfMemory(ctx);
return NULL;
}
if (!init_string(&utf8)) {
JS_ReportOutOfMemory(ctx);
return NULL;
}
if (!add_jschars_to_utf8_string(&utf8, utf16, utf16_len)) {
done_string(&utf8);
JS_ReportOutOfMemory(ctx);
return NULL;
}
if (length)
*length = utf8.length;
return utf8.source;
}

View File

@ -16,4 +16,9 @@ void alert_smjs_error(unsigned char *msg);
void init_smjs(struct module *module); void init_smjs(struct module *module);
void cleanup_smjs(struct module *module); void cleanup_smjs(struct module *module);
JSString *utf8_to_jsstring(JSContext *ctx, const unsigned char *str,
int length);
unsigned char *jsstring_to_utf8(JSContext *ctx, JSString *jsstr,
int *length);
#endif #endif