Merged tinygettext branch

git-svn-id: svn+ssh://svn.code.sf.net/p/supertuxkart/code/main/trunk@7958 178a84e3-b1eb-0310-8ba1-8eac791a3b58
This commit is contained in:
auria 2011-03-17 00:02:27 +00:00
commit b3fc0ed2cd
32 changed files with 3922 additions and 19 deletions

View File

@ -362,6 +362,26 @@ supertuxkart_SOURCES = \
states_screens/tracks_screen.hpp \
states_screens/tutorial_screen.cpp \
states_screens/tutorial_screen.hpp \
tinygettext/dictionary.cpp \
tinygettext/dictionary.hpp \
tinygettext/dictionary_manager.cpp \
tinygettext/dictionary_manager.hpp \
tinygettext/file_system.hpp \
tinygettext/iconv.cpp \
tinygettext/iconv.hpp \
tinygettext/language.cpp \
tinygettext/language.hpp \
tinygettext/log.cpp \
tinygettext/log.hpp \
tinygettext/log_stream.hpp \
tinygettext/plural_forms.cpp \
tinygettext/plural_forms.hpp \
tinygettext/po_parser.cpp \
tinygettext/po_parser.hpp \
tinygettext/stk_file_system.cpp \
tinygettext/stk_file_system.hpp \
tinygettext/tinygettext.cpp \
tinygettext/tinygettext.hpp \
tracks/ambient_light_sphere.cpp \
tracks/ambient_light_sphere.hpp \
tracks/bezier_curve.cpp \
@ -419,4 +439,3 @@ supertuxkart_LDADD = \
$(irrlicht_LIBS) $(fribidi_LIBS) $(bullet_LIBS) $(enet_LIBS) \
$(opengl_LIBS) $(openal_LIBS) $(oggvorbis_LIBS) \
$(INTLLIBS) $(LIBCURL_LIBS) $(LIBCURL_CFLAGS)

View File

@ -4,5 +4,5 @@ HEADER_SEARCH_PATHS = /usr/local/include /usr/include /Library/Frameworks/IrrFra
OTHER_CFLAGS = -Wall -DHAVE_OGGVORBIS=1 -DHAS_SOCKLEN_T -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAS_POLL=1 -DHAS_FCNTL=1 -DHAS_INET_PTON=1 -DHAS_INET_NTOP=1 -DHAS_MSGHDR_FLAGS=1 -DENABLE_NLS=1 -DHAVE_GETTEXT=1 -DHAVE_GLUT=1 -DHAVE_IRRLICHT=1 -DPACKAGE="\"supertuxkart\"" -D__MACOSX__=1 -DHAVE_RTT=0 -DENABLE_BIDI=1 -fvisibility=hidden -DVERSION=\"svn\"
OTHER_LDFLAGS = -lintl
OTHER_LDFLAGS =
LIBRARY_SEARCH_PATHS = /usr/local/lib /usr/lib

View File

@ -308,6 +308,15 @@
95833240101243ED00C5137E /* player_info_dialog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 95833239101243ED00C5137E /* player_info_dialog.cpp */; };
95833241101243ED00C5137E /* press_a_key_dialog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9583323B101243ED00C5137E /* press_a_key_dialog.cpp */; };
95833242101243ED00C5137E /* track_info_dialog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9583323D101243ED00C5137E /* track_info_dialog.cpp */; };
9584449E1330F89100CEA60A /* dictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9584448A1330F89100CEA60A /* dictionary.cpp */; };
9584449F1330F89100CEA60A /* dictionary_manager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9584448C1330F89100CEA60A /* dictionary_manager.cpp */; };
958444A01330F89100CEA60A /* iconv.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9584448F1330F89100CEA60A /* iconv.cpp */; };
958444A11330F89100CEA60A /* language.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 958444911330F89100CEA60A /* language.cpp */; };
958444A21330F89100CEA60A /* log.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 958444931330F89100CEA60A /* log.cpp */; };
958444A31330F89100CEA60A /* plural_forms.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 958444961330F89100CEA60A /* plural_forms.cpp */; };
958444A41330F89100CEA60A /* po_parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 958444981330F89100CEA60A /* po_parser.cpp */; };
958444A51330F89100CEA60A /* stk_file_system.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9584449A1330F89100CEA60A /* stk_file_system.cpp */; };
958444A61330F89100CEA60A /* tinygettext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9584449C1330F89100CEA60A /* tinygettext.cpp */; };
9586318411B1EC9F00B8B4AF /* grand_prix_lose.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9586318011B1EC9F00B8B4AF /* grand_prix_lose.cpp */; };
9586318511B1EC9F00B8B4AF /* grand_prix_win.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9586318211B1EC9F00B8B4AF /* grand_prix_win.cpp */; };
958BD770117F6AE90095B483 /* music_manager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 958BD76E117F6AE90095B483 /* music_manager.cpp */; };
@ -1045,6 +1054,26 @@
9583323C101243ED00C5137E /* press_a_key_dialog.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = press_a_key_dialog.hpp; path = ../../states_screens/dialogs/press_a_key_dialog.hpp; sourceTree = SOURCE_ROOT; };
9583323D101243ED00C5137E /* track_info_dialog.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = track_info_dialog.cpp; path = ../../states_screens/dialogs/track_info_dialog.cpp; sourceTree = SOURCE_ROOT; };
9583323E101243ED00C5137E /* track_info_dialog.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = track_info_dialog.hpp; path = ../../states_screens/dialogs/track_info_dialog.hpp; sourceTree = SOURCE_ROOT; };
9584448A1330F89100CEA60A /* dictionary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dictionary.cpp; path = ../../tinygettext/dictionary.cpp; sourceTree = SOURCE_ROOT; };
9584448B1330F89100CEA60A /* dictionary.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = dictionary.hpp; path = ../../tinygettext/dictionary.hpp; sourceTree = SOURCE_ROOT; };
9584448C1330F89100CEA60A /* dictionary_manager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dictionary_manager.cpp; path = ../../tinygettext/dictionary_manager.cpp; sourceTree = SOURCE_ROOT; };
9584448D1330F89100CEA60A /* dictionary_manager.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = dictionary_manager.hpp; path = ../../tinygettext/dictionary_manager.hpp; sourceTree = SOURCE_ROOT; };
9584448E1330F89100CEA60A /* file_system.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = file_system.hpp; path = ../../tinygettext/file_system.hpp; sourceTree = SOURCE_ROOT; };
9584448F1330F89100CEA60A /* iconv.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = iconv.cpp; path = ../../tinygettext/iconv.cpp; sourceTree = SOURCE_ROOT; };
958444901330F89100CEA60A /* iconv.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = iconv.hpp; path = ../../tinygettext/iconv.hpp; sourceTree = SOURCE_ROOT; };
958444911330F89100CEA60A /* language.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = language.cpp; path = ../../tinygettext/language.cpp; sourceTree = SOURCE_ROOT; };
958444921330F89100CEA60A /* language.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = language.hpp; path = ../../tinygettext/language.hpp; sourceTree = SOURCE_ROOT; };
958444931330F89100CEA60A /* log.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = log.cpp; path = ../../tinygettext/log.cpp; sourceTree = SOURCE_ROOT; };
958444941330F89100CEA60A /* log.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = log.hpp; path = ../../tinygettext/log.hpp; sourceTree = SOURCE_ROOT; };
958444951330F89100CEA60A /* log_stream.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = log_stream.hpp; path = ../../tinygettext/log_stream.hpp; sourceTree = SOURCE_ROOT; };
958444961330F89100CEA60A /* plural_forms.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = plural_forms.cpp; path = ../../tinygettext/plural_forms.cpp; sourceTree = SOURCE_ROOT; };
958444971330F89100CEA60A /* plural_forms.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = plural_forms.hpp; path = ../../tinygettext/plural_forms.hpp; sourceTree = SOURCE_ROOT; };
958444981330F89100CEA60A /* po_parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = po_parser.cpp; path = ../../tinygettext/po_parser.cpp; sourceTree = SOURCE_ROOT; };
958444991330F89100CEA60A /* po_parser.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = po_parser.hpp; path = ../../tinygettext/po_parser.hpp; sourceTree = SOURCE_ROOT; };
9584449A1330F89100CEA60A /* stk_file_system.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = stk_file_system.cpp; path = ../../tinygettext/stk_file_system.cpp; sourceTree = SOURCE_ROOT; };
9584449B1330F89100CEA60A /* stk_file_system.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = stk_file_system.hpp; path = ../../tinygettext/stk_file_system.hpp; sourceTree = SOURCE_ROOT; };
9584449C1330F89100CEA60A /* tinygettext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tinygettext.cpp; path = ../../tinygettext/tinygettext.cpp; sourceTree = SOURCE_ROOT; };
9584449D1330F89100CEA60A /* tinygettext.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = tinygettext.hpp; path = ../../tinygettext/tinygettext.hpp; sourceTree = SOURCE_ROOT; };
9586318011B1EC9F00B8B4AF /* grand_prix_lose.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = grand_prix_lose.cpp; path = ../../states_screens/grand_prix_lose.cpp; sourceTree = SOURCE_ROOT; };
9586318111B1EC9F00B8B4AF /* grand_prix_lose.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = grand_prix_lose.hpp; path = ../../states_screens/grand_prix_lose.hpp; sourceTree = SOURCE_ROOT; };
9586318211B1EC9F00B8B4AF /* grand_prix_win.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = grand_prix_win.cpp; path = ../../states_screens/grand_prix_win.cpp; sourceTree = SOURCE_ROOT; };
@ -2188,6 +2217,34 @@
path = ../../states_screens/dialogs;
sourceTree = SOURCE_ROOT;
};
958444891330F89100CEA60A /* tinygettext */ = {
isa = PBXGroup;
children = (
9584448A1330F89100CEA60A /* dictionary.cpp */,
9584448B1330F89100CEA60A /* dictionary.hpp */,
9584448C1330F89100CEA60A /* dictionary_manager.cpp */,
9584448D1330F89100CEA60A /* dictionary_manager.hpp */,
9584448E1330F89100CEA60A /* file_system.hpp */,
9584448F1330F89100CEA60A /* iconv.cpp */,
958444901330F89100CEA60A /* iconv.hpp */,
958444911330F89100CEA60A /* language.cpp */,
958444921330F89100CEA60A /* language.hpp */,
958444931330F89100CEA60A /* log.cpp */,
958444941330F89100CEA60A /* log.hpp */,
958444951330F89100CEA60A /* log_stream.hpp */,
958444961330F89100CEA60A /* plural_forms.cpp */,
958444971330F89100CEA60A /* plural_forms.hpp */,
958444981330F89100CEA60A /* po_parser.cpp */,
958444991330F89100CEA60A /* po_parser.hpp */,
9584449A1330F89100CEA60A /* stk_file_system.cpp */,
9584449B1330F89100CEA60A /* stk_file_system.hpp */,
9584449C1330F89100CEA60A /* tinygettext.cpp */,
9584449D1330F89100CEA60A /* tinygettext.hpp */,
);
name = tinygettext;
path = ../../tinygettext;
sourceTree = SOURCE_ROOT;
};
95A118280F77EA3100B18B3D /* input */ = {
isa = PBXGroup;
children = (
@ -2226,6 +2283,7 @@
95263DDF0FD7471900CF5F92 /* race */,
95C2B19C0F296545000D3E5D /* replay */,
958330C110122B4A00C5137E /* states_screens */,
958444891330F89100CEA60A /* tinygettext */,
95C2B1CE0F296545000D3E5D /* tracks */,
9576460212BAD1CF00DB80C7 /* tutorial */,
95C2B1DF0F296546000D3E5D /* utils */,
@ -2986,6 +3044,15 @@
9592DC6D13021B350039DBC8 /* minimal_race_gui.cpp in Sources */,
95E1FCDF130369EB004D83CC /* per_camera_node.cpp in Sources */,
95376CAF1320784100C842A4 /* lod_node.cpp in Sources */,
9584449E1330F89100CEA60A /* dictionary.cpp in Sources */,
9584449F1330F89100CEA60A /* dictionary_manager.cpp in Sources */,
958444A01330F89100CEA60A /* iconv.cpp in Sources */,
958444A11330F89100CEA60A /* language.cpp in Sources */,
958444A21330F89100CEA60A /* log.cpp in Sources */,
958444A31330F89100CEA60A /* plural_forms.cpp in Sources */,
958444A41330F89100CEA60A /* po_parser.cpp in Sources */,
958444A51330F89100CEA60A /* stk_file_system.cpp in Sources */,
958444A61330F89100CEA60A /* tinygettext.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -233,7 +233,7 @@
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="opengl32.lib user32.lib gdi32.lib winmm.lib advapi32.lib OpenAL32.lib libogg.lib libvorbis.lib libvorbisfile.lib intl.lib Irrlicht.lib ws2_32.lib fribidi.lib pthreadVC2.lib libcurld_imp.lib"
AdditionalDependencies="opengl32.lib user32.lib gdi32.lib winmm.lib advapi32.lib OpenAL32.lib libogg.lib libvorbis.lib libvorbisfile.lib Irrlicht.lib ws2_32.lib fribidi.lib pthreadVC2.lib libcurld_imp.lib"
OutputFile="./../../../$(ProjectName)_curl_d.exe"
LinkIncremental="2"
AdditionalLibraryDirectories="../../../dependencies/lib"
@ -1155,6 +1155,46 @@
>
</File>
</Filter>
<Filter
Name="tinygettext"
>
<File
RelativePath="..\..\tinygettext\dictionary.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\dictionary_manager.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\iconv.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\language.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\log.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\plural_forms.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\po_parser.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\stk_file_system.cpp"
>
</File>
<File
RelativePath="..\..\tinygettext\tinygettext.cpp"
>
</File>
</Filter>
</Filter>
<Filter
Name="Headerdateien"
@ -1396,10 +1436,30 @@
RelativePath="..\..\utils\translation.hpp"
>
</File>
<File
RelativePath="..\..\utils\utf8.h"
>
</File>
<File
RelativePath="..\..\utils\vec3.hpp"
>
</File>
<Filter
Name="utf8"
>
<File
RelativePath="..\..\utils\utf8\checked.h"
>
</File>
<File
RelativePath="..\..\utils\utf8\core.h"
>
</File>
<File
RelativePath="..\..\utils\utf8\unchecked.h"
>
</File>
</Filter>
</Filter>
<Filter
Name="audio"
@ -2101,6 +2161,54 @@
>
</File>
</Filter>
<Filter
Name="tinygettext"
>
<File
RelativePath="..\..\tinygettext\dictionary.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\dictionary_manager.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\file_system.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\iconv.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\language.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\log.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\log_stream.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\plural_forms.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\po_parser.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\stk_file_system.hpp"
>
</File>
<File
RelativePath="..\..\tinygettext\tinygettext.hpp"
>
</File>
</Filter>
</Filter>
<Filter
Name="Ressourcendateien"

View File

@ -220,7 +220,8 @@ void MainMenuScreen::eventCallback(Widget* widget, const std::string& name, cons
if (selection == "system")
{
#ifdef WIN32
SetEnvironmentVariableA("LANGUAGE", "");
SetEnvironmentVariable("LANGUAGE", "");
_putenv("LANGUAGE=");
#else
setenv( "LANGUAGE", "", 1);
#endif
@ -228,7 +229,8 @@ void MainMenuScreen::eventCallback(Widget* widget, const std::string& name, cons
else
{
#ifdef WIN32
SetEnvironmentVariableA("LANGUAGE", selection.c_str());
std::string s=std::string("LANGUAGE=")+selection.c_str();
_putenv(s.c_str());
#else
setenv("LANGUAGE", selection.c_str(), 1);
#endif

View File

@ -54,6 +54,9 @@ using namespace irr;
#else
#include <GL/gl.h>
#endif
#else
#include <GL/gl.h>
#endif
/** The constructor is called before anything is attached to the scene node.
* So rendering to a texture can be done here. But world is not yet fully

View File

@ -0,0 +1,208 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include <assert.h>
#include "log_stream.hpp"
#include "dictionary.hpp"
namespace tinygettext {
Dictionary::Dictionary(const std::string& charset_) :
entries(),
ctxt_entries(),
charset(charset_),
plural_forms()
{
}
Dictionary::~Dictionary()
{
}
std::string
Dictionary::get_charset() const
{
return charset;
}
void
Dictionary::set_plural_forms(const PluralForms& plural_forms_)
{
plural_forms = plural_forms_;
}
PluralForms
Dictionary::get_plural_forms() const
{
return plural_forms;
}
std::string
Dictionary::translate_plural(const std::string& msgid, const std::string& msgid_plural, int num)
{
return translate_plural(entries, msgid, msgid_plural, num);
}
std::string
Dictionary::translate_plural(const Entries& dict, const std::string& msgid, const std::string& msgid_plural, int count)
{
Entries::const_iterator i = dict.find(msgid);
const std::vector<std::string>& msgstrs = i->second;
if (i != dict.end())
{
unsigned int n = 0;
n = plural_forms.get_plural(count);
assert(/*n >= 0 &&*/ n < msgstrs.size());
if (!msgstrs[n].empty())
return msgstrs[n];
else
if (count == 1) // default to english rules
return msgid;
else
return msgid_plural;
}
else
{
//log_info << "Couldn't translate: " << msgid << std::endl;
//log_info << "Candidates: " << std::endl;
//for (i = dict.begin(); i != dict.end(); ++i)
// log_info << "'" << i->first << "'" << std::endl;
if (count == 1) // default to english rules
return msgid;
else
return msgid_plural;
}
}
std::string
Dictionary::translate(const std::string& msgid)
{
return translate(entries, msgid);
}
std::string
Dictionary::translate(const Entries& dict, const std::string& msgid)
{
Entries::const_iterator i = dict.find(msgid);
if (i != dict.end() && !i->second.empty())
{
return i->second[0];
}
else
{
//log_info << "Couldn't translate: " << msgid << std::endl;
return msgid;
}
}
std::string
Dictionary::translate_ctxt(const std::string& msgctxt, const std::string& msgid)
{
CtxtEntries::iterator i = ctxt_entries.find(msgctxt);
if (i != ctxt_entries.end())
{
return translate(i->second, msgid);
}
else
{
//log_info << "Couldn't translate: " << msgid << std::endl;
return msgid;
}
}
std::string
Dictionary::translate_ctxt_plural(const std::string& msgctxt,
const std::string& msgid, const std::string& msgidplural, int num)
{
CtxtEntries::iterator i = ctxt_entries.find(msgctxt);
if (i != ctxt_entries.end())
{
return translate_plural(i->second, msgid, msgidplural, num);
}
else
{
//log_info << "Couldn't translate: " << msgid << std::endl;
if (num != 1) // default to english
return msgidplural;
else
return msgid;
}
}
void
Dictionary::add_translation(const std::string& msgid, const std::string& ,
const std::vector<std::string>& msgstrs)
{
// Do we need msgid2 for anything? its after all supplied to the
// translate call, so we just throw it away here
entries[msgid] = msgstrs;
}
void
Dictionary::add_translation(const std::string& msgid, const std::string& msgstr)
{
std::vector<std::string>& vec = entries[msgid];
if (vec.empty())
{
vec.push_back(msgstr);
}
else
{
log_warning << "collision in add_translation: '"
<< msgid << "' -> '" << msgstr << "' vs '" << vec[0] << "'" << std::endl;
vec[0] = msgstr;
}
}
void
Dictionary::add_translation(const std::string& msgctxt,
const std::string& msgid, const std::string& msgid_plural,
const std::vector<std::string>& msgstrs)
{
std::vector<std::string>& vec = ctxt_entries[msgctxt][msgid];
if (vec.empty())
{
vec = msgstrs;
}
else
{
log_warning << "collision in add_translation(\"" << msgctxt << "\", \"" << msgid << "\", \"" << msgid_plural << "\")" << std::endl;
vec = msgstrs;
}
}
void
Dictionary::add_translation(const std::string& msgctxt, const std::string& msgid, const std::string& msgstr)
{
std::vector<std::string>& vec = ctxt_entries[msgctxt][msgid];
if (vec.empty())
{
vec.push_back(msgstr);
}
else
{
log_warning << "collision in add_translation(\"" << msgctxt << "\", \"" << msgid << "\")" << std::endl;
vec[0] = msgstr;
}
}
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,123 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_DICTIONARY_HPP
#define HEADER_TINYGETTEXT_DICTIONARY_HPP
#include <map>
#include <vector>
#include <string>
#include "plural_forms.hpp"
namespace tinygettext {
/** A simple dictionary class that mimics gettext() behaviour. Each
Dictionary only works for a single language, for managing multiple
languages and .po files at once use the DictionaryManager. */
class Dictionary
{
private:
typedef std::map<std::string, std::vector<std::string> > Entries;
Entries entries;
typedef std::map<std::string, Entries> CtxtEntries;
CtxtEntries ctxt_entries;
std::string charset;
PluralForms plural_forms;
std::string translate(const Entries& dict, const std::string& msgid);
std::string translate_plural(const Entries& dict, const std::string& msgid, const std::string& msgidplural, int num);
public:
/** Constructs a dictionary converting to the specified \a charset (default UTF-8) */
Dictionary(const std::string& charset = "UTF-8");
~Dictionary();
/** Return the charset used for this dictionary */
std::string get_charset() const;
void set_plural_forms(const PluralForms&);
PluralForms get_plural_forms() const;
/** Translate the string \a msgid. */
std::string translate(const std::string& msgid);
/** Translate the string \a msgid to its correct plural form, based
on the number of items given by \a num. \a msgid_plural is \a msgid in
plural form. */
std::string translate_plural(const std::string& msgid, const std::string& msgidplural, int num);
/** Translate the string \a msgid that is in context \a msgctx. A
context is a way to disambiguate msgids that contain the same
letters, but different meaning. For example "exit" might mean to
quit doing something or it might refer to a door that leads
outside (i.e. 'Ausgang' vs 'Beenden' in german) */
std::string translate_ctxt(const std::string& msgctxt, const std::string& msgid);
std::string translate_ctxt_plural(const std::string& msgctxt, const std::string& msgid, const std::string& msgidplural, int num);
/** Add a translation from \a msgid to \a msgstr to the dictionary,
where \a msgid is the singular form of the message, msgid_plural the
plural form and msgstrs a table of translations. The right
translation will be calculated based on the \a num argument to
translate(). */
void add_translation(const std::string& msgid, const std::string& msgid_plural,
const std::vector<std::string>& msgstrs);
void add_translation(const std::string& msgctxt,
const std::string& msgid, const std::string& msgid_plural,
const std::vector<std::string>& msgstrs);
/** Add a translation from \a msgid to \a msgstr to the
dictionary */
void add_translation(const std::string& msgid, const std::string& msgstr);
void add_translation(const std::string& msgctxt, const std::string& msgid, const std::string& msgstr);
/** Iterate over all messages, Func is of type:
void func(const std::string& msgid, const std::vector<std::string>& msgstrs) */
template<class Func>
Func foreach(Func func)
{
for(Entries::iterator i = entries.begin(); i != entries.end(); ++i)
{
func(i->first, i->second);
}
return func;
}
/** Iterate over all messages with a context, Func is of type:
void func(const std::string& ctxt, const std::string& msgid, const std::vector<std::string>& msgstrs) */
template<class Func>
Func foreach_ctxt(Func func)
{
for(CtxtEntries::iterator i = ctxt_entries.begin(); i != ctxt_entries.end(); ++i)
{
for(Entries::iterator j = i->second.begin(); j != i->second.end(); ++j)
{
func(i->first, j->first, j->second);
}
}
return func;
}
};
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,242 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "dictionary_manager.hpp"
#include <memory>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <fstream>
#include <algorithm>
#include "log_stream.hpp"
#include "po_parser.hpp"
#include "stk_file_system.hpp"
namespace tinygettext {
static bool has_suffix(const std::string& lhs, const std::string rhs)
{
if (lhs.length() < rhs.length())
return false;
else
return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0;
}
DictionaryManager::DictionaryManager(const std::string& charset_) :
dictionaries(),
search_path(),
charset(charset_),
use_fuzzy(true),
current_language(),
current_dict(0),
empty_dict(),
filesystem(new StkFileSystem)
{
}
DictionaryManager::~DictionaryManager()
{
for(Dictionaries::iterator i = dictionaries.begin(); i != dictionaries.end(); ++i)
{
delete i->second;
}
}
void
DictionaryManager::clear_cache()
{
for(Dictionaries::iterator i = dictionaries.begin(); i != dictionaries.end(); ++i)
{
delete i->second;
}
dictionaries.clear();
current_dict = 0;
}
Dictionary&
DictionaryManager::get_dictionary()
{
if (current_dict)
{
return *current_dict;
}
else
{
if (current_language)
{
current_dict = &get_dictionary(current_language);
return *current_dict;
}
else
{
return empty_dict;
}
}
}
Dictionary&
DictionaryManager::get_dictionary(const Language& language)
{
//log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl;
//log_debug << "...normalized as \"" << lang << "\"" << std::endl;
assert(language);
Dictionaries::iterator i = dictionaries.find(language);
if (i != dictionaries.end())
{
return *i->second;
}
else // Dictionary for languages lang isn't loaded, so we load it
{
//log_debug << "get_dictionary: " << lang << std::endl;
Dictionary* dict = new Dictionary(charset);
dictionaries[language] = dict;
for (SearchPath::reverse_iterator p = search_path.rbegin(); p != search_path.rend(); ++p)
{
std::vector<std::string> files = filesystem->open_directory(*p);
std::string best_filename;
int best_score = 0;
for(std::vector<std::string>::iterator filename = files.begin(); filename != files.end(); filename++)
{
// check if filename matches requested language
if (has_suffix(*filename, ".po"))
{ // ignore anything that isn't a .po file
Language po_language = Language::from_env(filename->substr(0, filename->size()-3));
if (!po_language)
{
log_warning << *filename << ": warning: ignoring, unknown language" << std::endl;
}
else
{
int score = Language::match(language, po_language);
if (score > best_score)
{
best_score = score;
best_filename = *filename;
}
}
}
}
if (!best_filename.empty())
{
std::string pofile = *p + "/" + best_filename;
try
{
std::auto_ptr<std::istream> in = filesystem->open_file(pofile);
if (!in.get())
{
log_error << "error: failure opening: " << pofile << std::endl;
}
else
{
POParser::parse(pofile, *in, *dict);
}
}
catch(std::exception& e)
{
log_error << "error: failure parsing: " << pofile << std::endl;
log_error << e.what() << "" << std::endl;
}
}
}
return *dict;
}
}
std::set<Language>
DictionaryManager::get_languages()
{
std::set<Language> languages;
for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p)
{
std::vector<std::string> files = filesystem->open_directory(*p);
for(std::vector<std::string>::iterator file = files.begin(); file != files.end(); ++file)
{
if (has_suffix(*file, ".po"))
{
languages.insert(Language::from_env(file->substr(0, file->size()-3)));
}
}
}
return languages;
}
void
DictionaryManager::set_language(const Language& language)
{
if (current_language != language)
{
current_language = language;
current_dict = 0;
}
}
Language
DictionaryManager::get_language() const
{
return current_language;
}
void
DictionaryManager::set_charset(const std::string& charset_)
{
clear_cache(); // changing charset invalidates cache
charset = charset_;
}
void
DictionaryManager::set_use_fuzzy(bool t)
{
clear_cache();
use_fuzzy = t;
}
bool
DictionaryManager::get_use_fuzzy() const
{
return use_fuzzy;
}
void
DictionaryManager::add_directory(const std::string& pathname)
{
clear_cache(); // adding directories invalidates cache
search_path.push_back(pathname);
}
void
DictionaryManager::set_filesystem(std::auto_ptr<FileSystem> filesystem_)
{
filesystem = filesystem_;
}
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,99 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_DICTIONARY_MANAGER_HPP
#define HEADER_TINYGETTEXT_DICTIONARY_MANAGER_HPP
#include <map>
#include <set>
#include <string>
#include <vector>
#include <memory>
#include "dictionary.hpp"
#include "language.hpp"
namespace tinygettext {
class FileSystem;
/** Manager class for dictionaries, you give it a bunch of directories
with .po files and it will then automatically load the right file
on demand depending on which language was set. */
class DictionaryManager
{
private:
typedef std::map<Language, Dictionary*> Dictionaries;
Dictionaries dictionaries;
typedef std::vector<std::string> SearchPath;
SearchPath search_path;
std::string charset;
bool use_fuzzy;
Language current_language;
Dictionary* current_dict;
Dictionary empty_dict;
std::auto_ptr<FileSystem> filesystem;
void clear_cache();
public:
DictionaryManager(const std::string& charset_ = "UTF-8");
~DictionaryManager();
/** Return the currently active dictionary, if none is set, an empty
dictionary is returned. */
Dictionary& get_dictionary();
/** Get dictionary for language */
Dictionary& get_dictionary(const Language& language);
/** Set a language based on a four? letter country code */
void set_language(const Language& language);
/** returns the (normalized) country code of the currently used language */
Language get_language() const;
void set_use_fuzzy(bool t);
bool get_use_fuzzy() const;
/** Set a charset that will be set on the returned dictionaries */
void set_charset(const std::string& charset);
/** Add a directory to the search path for dictionaries, earlier
added directories have higher priority then later added ones */
void add_directory(const std::string& pathname);
/** Return a set of the available languages in their country code */
std::set<Language> get_languages();
void set_filesystem(std::auto_ptr<FileSystem> filesystem);
private:
DictionaryManager (const DictionaryManager&);
DictionaryManager& operator= (const DictionaryManager&);
};
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,42 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_FILE_SYSTEM_HPP
#define HEADER_TINYGETTEXT_FILE_SYSTEM_HPP
#include <vector>
#include <memory>
#include <iosfwd>
#include <string>
namespace tinygettext {
class FileSystem
{
public:
virtual ~FileSystem() {}
virtual std::vector<std::string> open_directory(const std::string& pathname) =0;
virtual std::auto_ptr<std::istream> open_file(const std::string& filename) =0;
};
} // namespace tinygettext
#endif
/* EOF */

150
src/tinygettext/iconv.cpp Normal file
View File

@ -0,0 +1,150 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
/*
#include <ctype.h>
#include <assert.h>
#include <sstream>
#include <errno.h>
#include <stdexcept>
#include <string.h>
#include <stdlib.h>
#include "iconv.hpp"
#include "log_stream.hpp"
namespace tinygettext {
#ifndef tinygettext_ICONV_CONST
# define tinygettext_ICONV_CONST
#endif
IConv::IConv()
: to_charset(),
from_charset(),
cd(0)
{}
IConv::IConv(const std::string& from_charset_, const std::string& to_charset_)
: to_charset(),
from_charset(),
cd(0)
{
set_charsets(from_charset_, to_charset_);
}
IConv::~IConv()
{
if (cd)
tinygettext_iconv_close(cd);
}
void
IConv::set_charsets(const std::string& from_charset_, const std::string& to_charset_)
{
if (cd)
tinygettext_iconv_close(cd);
from_charset = from_charset_;
to_charset = to_charset_;
for(std::string::iterator i = to_charset.begin(); i != to_charset.end(); ++i)
*i = static_cast<char>(toupper(*i));
for(std::string::iterator i = from_charset.begin(); i != from_charset.end(); ++i)
*i = static_cast<char>(toupper(*i));
if (to_charset == from_charset)
{
cd = 0;
}
else
{
cd = tinygettext_iconv_open(to_charset.c_str(), from_charset.c_str());
if (cd == reinterpret_cast<tinygettext_iconv_t>(-1))
{
if(errno == EINVAL)
{
std::ostringstream str;
str << "IConv construction failed: conversion from '" << from_charset
<< "' to '" << to_charset << "' not available";
throw std::runtime_error(str.str());
}
else
{
std::ostringstream str;
str << "IConv: construction failed: " << strerror(errno);
throw std::runtime_error(str.str());
}
}
}
}
/// Convert a string from encoding to another.
std::string
IConv::convert(const std::string& text)
{
if (!cd)
{
return text;
}
else
{
size_t inbytesleft = text.size();
size_t outbytesleft = 4*inbytesleft; // Worst case scenario: ASCII -> UTF-32?
// We try to avoid to much copying around, so we write directly into
// a std::string
tinygettext_ICONV_CONST char* inbuf = const_cast<char*>(&text[0]);
std::string result(outbytesleft, 'X');
char* outbuf = &result[0];
// Try to convert the text.
size_t ret = tinygettext_iconv(cd, (const char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (ret == static_cast<size_t>(-1))
{
if (errno == EILSEQ || errno == EINVAL)
{ // invalid multibyte sequence
tinygettext_iconv(cd, NULL, NULL, NULL, NULL); // reset state
// FIXME: Could try to skip the invalid byte and continue
log_error << "error: tinygettext:iconv: invalid multibyte sequence in: \"" << text << "\"" << std::endl;
}
else if (errno == E2BIG)
{ // output buffer to small
assert(!"tinygettext/iconv.cpp: E2BIG: This should never be reached");
}
else if (errno == EBADF)
{
assert(!"tinygettext/iconv.cpp: EBADF: This should never be reached");
}
else
{
assert(!"tinygettext/iconv.cpp: <unknown>: This should never be reached");
}
}
result.resize(4*text.size() - outbytesleft);
return result;
}
}
} // namespace tinygettext
*/
/* EOF */

72
src/tinygettext/iconv.hpp Normal file
View File

@ -0,0 +1,72 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
/*
#ifndef HEADER_TINYGETTEXT_ICONV_HPP
#define HEADER_TINYGETTEXT_ICONV_HPP
#include <string>
#ifdef HAVE_SDL
# include "SDL.h"
# define tinygettext_ICONV_CONST const
# define tinygettext_iconv_t SDL_iconv_t
# define tinygettext_iconv SDL_iconv
# define tinygettext_iconv_open SDL_iconv_open
# define tinygettext_iconv_close SDL_iconv_close
#else
# include <iconv.h>
# ifdef HAVE_ICONV_CONST
# define tinygettext_ICONV_CONST ICONV_CONST
# else
# define tinygettext_ICONV_CONST
# endif
# define tinygettext_iconv_t iconv_t
# define tinygettext_iconv iconv
# define tinygettext_iconv_open iconv_open
# define tinygettext_iconv_close iconv_close
#endif
namespace tinygettext {
class IConv
{
private:
std::string to_charset;
std::string from_charset;
tinygettext_iconv_t cd;
public:
IConv();
IConv(const std::string& fromcode, const std::string& tocode);
~IConv();
void set_charsets(const std::string& fromcode, const std::string& tocode);
std::string convert(const std::string& text);
private:
IConv (const IConv&);
IConv& operator= (const IConv&);
};
} // namespace tinygettext
#endif
*/
/* EOF */

View File

@ -0,0 +1,568 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "language.hpp"
#include <map>
#include <assert.h>
#include <vector>
namespace tinygettext {
struct LanguageSpec {
/** Language code: "de", "en", ... */
const char* language;
/** Country code: "BR", "DE", ..., can be 0 */
const char* country;
/** Modifier/Varint: "Latn", "ije", "latin"..., can be 0 */
const char* modifier;
/** Language name: "German", "English", "French", ... */
const char* name;
};
/** Language Definitions */
//*{
LanguageSpec languages[] = {
{ "aa", 0, 0, "Afar" },
{ "af", 0, 0, "Afrikaans" },
{ "af", "ZA", 0, "Afrikaans (South Africa)" },
{ "am", 0, 0, "Amharic" },
{ "ar", 0, 0, "Arabic" },
{ "ar", "AR", 0, "Arabic (Argentina)" },
{ "ar", "OM", 0, "Arabic (Oman)" },
{ "ar", "SA", 0, "Arabic (Saudi Arabia)" },
{ "ar", "SY", 0, "Arabic (Syrian Arab Republic)" },
{ "ar", "TN", 0, "Arabic (Tunisia)" },
{ "as", 0, 0, "Assamese" },
{ "ast",0, 0, "Asturian" },
{ "ay", 0, 0, "Aymara" },
{ "az", 0, 0, "Azerbaijani" },
{ "az", "IR", 0, "Azerbaijani (Iran)" },
{ "be", 0, 0, "Belarusian" },
{ "be", 0, "latin", "Belarusian" },
{ "bg", 0, 0, "Bulgarian" },
{ "bg", "BG", 0, "Bulgarian (Bulgaria)" },
{ "bn", 0, 0, "Bengali" },
{ "bn", "BD", 0, "Bengali (Bangladesh)" },
{ "bn", "IN", 0, "Bengali (India)" },
{ "bo", 0, 0, "Tibetan" },
{ "br", 0, 0, "Breton" },
{ "bs", 0, 0, "Bosnian" },
{ "bs", "BA", 0, "Bosnian (Bosnia/Herzegovina)"},
{ "bs", "BS", 0, "Bosnian (Bahamas)" },
{ "ca", "ES", "valencia", "Catalan (valencia)" },
{ "ca", "ES", 0, "Catalan (Spain)" },
{ "ca", 0, "valencia", "Catalan (valencia)" },
{ "ca", 0, 0, "Catalan" },
{ "co", 0, 0, "Corsican" },
{ "cs", 0, 0, "Czech" },
{ "cs", "CZ", 0, "Czech (Czech Republic)" },
{ "cy", 0, 0, "Welsh" },
{ "cy", "GB", 0, "Welsh (Great Britain)" },
{ "cz", 0, 0, "Unknown language" },
{ "da", 0, 0, "Danish" },
{ "da", "DK", 0, "Danish (Denmark)" },
{ "de", 0, 0, "German" },
{ "de", "AT", 0, "German (Austria)" },
{ "de", "CH", 0, "German (Switzerland)" },
{ "de", "DE", 0, "German (Germany)" },
{ "dk", 0, 0, "Unknown language" },
{ "dz", 0, 0, "Dzongkha" },
{ "el", 0, 0, "Greek" },
{ "el", "GR", 0, "Greek (Greece)" },
{ "en", 0, 0, "English" },
{ "en", "AU", 0, "English (Australia)" },
{ "en", "CA", 0, "English (Canada)" },
{ "en", "GB", 0, "English (Great Britain)" },
{ "en", "US", 0, "English (United States)" },
{ "en", "ZA", 0, "English (South Africa)" },
{ "en", 0, "boldquot", "English" },
{ "en", 0, "quot", "English" },
{ "en", "US", "piglatin", "English" },
{ "eo", 0, 0, "Esperanto" },
{ "es", 0, 0, "Spanish" },
{ "es", "AR", 0, "Spanish (Argentina)" },
{ "es", "CL", 0, "Spanish (Chile)" },
{ "es", "CO", 0, "Spanish (Colombia)" },
{ "es", "CR", 0, "Spanish (Costa Rica)" },
{ "es", "DO", 0, "Spanish (Dominican Republic)"},
{ "es", "EC", 0, "Spanish (Ecuador)" },
{ "es", "ES", 0, "Spanish (Spain)" },
{ "es", "GT", 0, "Spanish (Guatemala)" },
{ "es", "HN", 0, "Spanish (Honduras)" },
{ "es", "LA", 0, "Spanish (Laos)" },
{ "es", "MX", 0, "Spanish (Mexico)" },
{ "es", "NI", 0, "Spanish (Nicaragua)" },
{ "es", "PA", 0, "Spanish (Panama)" },
{ "es", "PE", 0, "Spanish (Peru)" },
{ "es", "PR", 0, "Spanish (Puerto Rico)" },
{ "es", "SV", 0, "Spanish (El Salvador)" },
{ "es", "UY", 0, "Spanish (Uruguay)" },
{ "es", "VE", 0, "Spanish (Venezuela)" },
{ "et", 0, 0, "Estonian" },
{ "et", "EE", 0, "Estonian (Estonia)" },
{ "et", "ET", 0, "Estonian (Ethiopia)" },
{ "eu", 0, 0, "Basque" },
{ "eu", "ES", 0, "Basque (Spain)" },
{ "fa", 0, 0, "Persian" },
{ "fa", "AF", 0, "Persian (Afghanistan)" },
{ "fa", "IR", 0, "Persian (Iran)" },
{ "fi", 0, 0, "Finnish" },
{ "fi", "FI", 0, "Finnish (Finland)" },
{ "fo", 0, 0, "Faroese" },
{ "fo", "FO", 0, "Faeroese (Faroe Islands)" },
{ "fr", 0, 0, "French" },
{ "fr", "CA", 0, "French (Canada)" },
{ "fr", "CH", 0, "French (Switzerland)" },
{ "fr", "FR", 0, "French (France)" },
{ "fr", "LU", 0, "French (Luxembourg)" },
{ "fy", 0, 0, "Frisian" },
{ "ga", 0, 0, "Irish" },
{ "gd", 0, 0, "Gaelic Scots" },
{ "gl", 0, 0, "Galician" },
{ "gl", "ES", 0, "Galician (Spain)" },
{ "gn", 0, 0, "Guarani" },
{ "gu", 0, 0, "Gujarati" },
{ "gv", 0, 0, "Manx" },
{ "ha", 0, 0, "Hausa" },
{ "he", 0, 0, "Hebrew" },
{ "he", "IL", 0, "Hebrew (Israel)" },
{ "hi", 0, 0, "Hindi" },
{ "hr", 0, 0, "Croatian" },
{ "hr", "HR", 0, "Croatian (Croatia)" },
{ "hu", 0, 0, "Hungarian" },
{ "hu", "HU", 0, "Hungarian (Hungary)" },
{ "hy", 0, 0, "Armenian" },
{ "ia", 0, 0, "Interlingua" },
{ "id", 0, 0, "Indonesian" },
{ "id", "ID", 0, "Indonesian (Indonesia)" },
{ "is", 0, 0, "Icelandic" },
{ "is", "IS", 0, "Icelandic (Iceland)" },
{ "it", 0, 0, "Italian" },
{ "it", "CH", 0, "Italian (Switzerland)" },
{ "it", "IT", 0, "Italian (Italy)" },
{ "iu", 0, 0, "Inuktitut" },
{ "ja", 0, 0, "Japanese" },
{ "ja", "JP", 0, "Japanese (Japan)" },
{ "ka", 0, 0, "Georgian" },
{ "kk", 0, 0, "Kazakh" },
{ "kl", 0, 0, "Kalaallisut" },
{ "km", 0, 0, "Khmer" },
{ "km", "KH", 0, "Khmer (Cambodia)" },
{ "kn", 0, 0, "Kannada" },
{ "ko", 0, 0, "Korean" },
{ "ko", "KR", 0, "Korean (Korea)" },
{ "ku", 0, 0, "Kurdish" },
{ "kw", 0, 0, "Cornish" },
{ "ky", 0, 0, "Kirghiz" },
{ "la", 0, 0, "Latin" },
{ "lo", 0, 0, "Lao" },
{ "lt", 0, 0, "Lithuanian" },
{ "lt", "LT", 0, "Lithuanian (Lithuania)" },
{ "lv", 0, 0, "Latvian" },
{ "lv", "LV", 0, "Latvian (Latvia)" },
{ "mg", 0, 0, "Malagasy" },
{ "mi", 0, 0, "Maori" },
{ "mk", 0, 0, "Macedonian" },
{ "mk", "MK", 0, "Macedonian (Macedonia)" },
{ "ml", 0, 0, "Malayalam" },
{ "mn", 0, 0, "Mongolian" },
{ "mr", 0, 0, "Marathi" },
{ "ms", 0, 0, "Malay" },
{ "ms", "MY", 0, "Malay (Malaysia)" },
{ "mt", 0, 0, "Maltese" },
{ "my", 0, 0, "Burmese" },
{ "my", "MM", 0, "Burmese (Myanmar)" },
{ "nb", 0, 0, "Norwegian Bokmal" },
{ "nb", "NO", 0, "Norwegian Bokmål (Norway)" },
{ "ne", 0, 0, "Nepali" },
{ "nl", 0, 0, "Dutch" },
{ "nl", "BE", 0, "Dutch (Belgium)" },
{ "nl", "NL", 0, "Dutch (Netherlands)" },
{ "nn", 0, 0, "Norwegian Nynorsk" },
{ "nn", "NO", 0, "Norwegian Nynorsk (Norway)" },
{ "no", 0, 0, "Norwegian" },
{ "no", "NO", 0, "Norwegian (Norway)" },
{ "no", "NY", 0, "Norwegian (NY)" },
{ "nr", 0, 0, "Ndebele, South" },
{ "oc", 0, 0, "Occitan post 1500" },
{ "om", 0, 0, "Oromo" },
{ "or", 0, 0, "Oriya" },
{ "pa", 0, 0, "Punjabi" },
{ "pl", 0, 0, "Polish" },
{ "pl", "PL", 0, "Polish (Poland)" },
{ "ps", 0, 0, "Pashto" },
{ "pt", 0, 0, "Portuguese" },
{ "pt", "BR", 0, "Brazilian" },
{ "pt", "PT", 0, "Portuguese (Portugal)" },
{ "qu", 0, 0, "Quechua" },
{ "rm", 0, 0, "Rhaeto-Romance" },
{ "ro", 0, 0, "Romanian" },
{ "ro", "RO", 0, "Romanian (Romania)" },
{ "ru", 0, 0, "Russian" },
{ "ru", "RU", 0, "Russian (Russia" },
{ "rw", 0, 0, "Kinyarwanda" },
{ "sa", 0, 0, "Sanskrit" },
{ "sd", 0, 0, "Sindhi" },
{ "se", 0, 0, "Sami" },
{ "se", "NO", 0, "Sami (Norway)" },
{ "si", 0, 0, "Sinhalese" },
{ "sk", 0, 0, "Slovak" },
{ "sk", "SK", 0, "Slovak (Slovakia)" },
{ "sl", 0, 0, "Slovenian" },
{ "sl", "SI", 0, "Slovenian (Slovenia)" },
{ "sl", "SL", 0, "Slovenian (Sierra Leone)" },
{ "sm", 0, 0, "Samoan" },
{ "so", 0, 0, "Somali" },
{ "sp", 0, 0, "Unknown language" },
{ "sq", 0, 0, "Albanian" },
{ "sq", "AL", 0, "Albanian (Albania)" },
{ "sr", 0, 0, "Serbian" },
{ "sr", "YU", 0, "Serbian (Yugoslavia)" },
{ "sr", 0,"ije", "Serbian" },
{ "sr", 0, "latin", "Serbian" },
{ "sr", 0, "Latn", "Serbian" },
{ "ss", 0, 0, "Swati" },
{ "st", 0, 0, "Sotho" },
{ "sv", 0, 0, "Swedish" },
{ "sv", "SE", 0, "Swedish (Sweden)" },
{ "sv", "SV", 0, "Swedish (El Salvador)" },
{ "sw", 0, 0, "Swahili" },
{ "ta", 0, 0, "Tamil" },
{ "te", 0, 0, "Telugu" },
{ "tg", 0, 0, "Tajik" },
{ "th", 0, 0, "Thai" },
{ "th", "TH", 0, "Thai (Thailand)" },
{ "ti", 0, 0, "Tigrinya" },
{ "tk", 0, 0, "Turkmen" },
{ "tl", 0, 0, "Tagalog" },
{ "to", 0, 0, "Tonga" },
{ "tr", 0, 0, "Turkish" },
{ "tr", "TR", 0, "Turkish (Turkey)" },
{ "ts", 0, 0, "Tsonga" },
{ "tt", 0, 0, "Tatar" },
{ "ug", 0, 0, "Uighur" },
{ "uk", 0, 0, "Ukrainian" },
{ "uk", "UA", 0, "Ukrainian (Ukraine)" },
{ "ur", 0, 0, "Urdu" },
{ "ur", "PK", 0, "Urdu (Pakistan)" },
{ "uz", 0, 0, "Uzbek" },
{ "uz", 0, "cyrillic", "Uzbek" },
{ "vi", 0, 0, "Vietnamese" },
{ "vi", "VN", 0, "Vietnamese (Vietnam)" },
{ "wa", 0, 0, "Walloon" },
{ "wo", 0, 0, "Wolof" },
{ "xh", 0, 0, "Xhosa" },
{ "yi", 0, 0, "Yiddish" },
{ "yo", 0, 0, "Yoruba" },
{ "zh", 0, 0, "Chinese" },
{ "zh", "CN", 0, "Chinese (simplified)" },
{ "zh", "HK", 0, "Chinese (Hong Kong)" },
{ "zh", "TW", 0, "Chinese (traditional)" },
{ "zu", 0, 0, "Zulu" },
{ NULL, 0, 0, NULL }
};
//*}
std::string
resolve_language_alias(const std::string& name)
{
typedef std::map<std::string, std::string> Aliases;
static Aliases language_aliases;
if (language_aliases.empty())
{
// FIXME: Many of those are not useful for us, since we leave
// encoding to the app, not to the language, we could/should
// also match against all language names, not just aliases from
// locale.alias
// Aliases taken from /etc/locale.alias
language_aliases["bokmal"] = "nb_NO.ISO-8859-1";
language_aliases["bokmål"] = "nb_NO.ISO-8859-1";
language_aliases["catalan"] = "ca_ES.ISO-8859-1";
language_aliases["croatian"] = "hr_HR.ISO-8859-2";
language_aliases["czech"] = "cs_CZ.ISO-8859-2";
language_aliases["danish"] = "da_DK.ISO-8859-1";
language_aliases["dansk"] = "da_DK.ISO-8859-1";
language_aliases["deutsch"] = "de_DE.ISO-8859-1";
language_aliases["dutch"] = "nl_NL.ISO-8859-1";
language_aliases["eesti"] = "et_EE.ISO-8859-1";
language_aliases["estonian"] = "et_EE.ISO-8859-1";
language_aliases["finnish"] = "fi_FI.ISO-8859-1";
language_aliases["français"] = "fr_FR.ISO-8859-1";
language_aliases["french"] = "fr_FR.ISO-8859-1";
language_aliases["galego"] = "gl_ES.ISO-8859-1";
language_aliases["galician"] = "gl_ES.ISO-8859-1";
language_aliases["german"] = "de_DE.ISO-8859-1";
language_aliases["greek"] = "el_GR.ISO-8859-7";
language_aliases["hebrew"] = "he_IL.ISO-8859-8";
language_aliases["hrvatski"] = "hr_HR.ISO-8859-2";
language_aliases["hungarian"] = "hu_HU.ISO-8859-2";
language_aliases["icelandic"] = "is_IS.ISO-8859-1";
language_aliases["italian"] = "it_IT.ISO-8859-1";
language_aliases["japanese"] = "ja_JP.eucJP";
language_aliases["japanese.euc"] = "ja_JP.eucJP";
language_aliases["ja_JP"] = "ja_JP.eucJP";
language_aliases["ja_JP.ujis"] = "ja_JP.eucJP";
language_aliases["japanese.sjis"] = "ja_JP.SJIS";
language_aliases["korean"] = "ko_KR.eucKR";
language_aliases["korean.euc"] = "ko_KR.eucKR";
language_aliases["ko_KR"] = "ko_KR.eucKR";
language_aliases["lithuanian"] = "lt_LT.ISO-8859-13";
language_aliases["no_NO"] = "nb_NO.ISO-8859-1";
language_aliases["no_NO.ISO-8859-1"] = "nb_NO.ISO-8859-1";
language_aliases["norwegian"] = "nb_NO.ISO-8859-1";
language_aliases["nynorsk"] = "nn_NO.ISO-8859-1";
language_aliases["polish"] = "pl_PL.ISO-8859-2";
language_aliases["portuguese"] = "pt_PT.ISO-8859-1";
language_aliases["romanian"] = "ro_RO.ISO-8859-2";
language_aliases["russian"] = "ru_RU.ISO-8859-5";
language_aliases["slovak"] = "sk_SK.ISO-8859-2";
language_aliases["slovene"] = "sl_SI.ISO-8859-2";
language_aliases["slovenian"] = "sl_SI.ISO-8859-2";
language_aliases["spanish"] = "es_ES.ISO-8859-1";
language_aliases["swedish"] = "sv_SE.ISO-8859-1";
language_aliases["thai"] = "th_TH.TIS-620";
language_aliases["turkish"] = "tr_TR.ISO-8859-9";
}
std::string name_lowercase;
name_lowercase.resize(name.size());
for(std::string::size_type i = 0; i < name.size(); ++i)
name_lowercase[i] = static_cast<char>(tolower(name[i]));
Aliases::iterator i = language_aliases.find(name_lowercase);
if (i != language_aliases.end())
{
return i->second;
}
else
{
return name;
}
}
Language
Language::from_spec(const std::string& language, const std::string& country, const std::string& modifier)
{
static std::map<std::string, std::vector<LanguageSpec*> > language_map;
if (language_map.empty())
{ // Init language_map
for(int i = 0; languages[i].language != NULL; ++i)
language_map[languages[i].language].push_back(&languages[i]);
}
std::map<std::string, std::vector<LanguageSpec*> >::iterator i = language_map.find(language);
if (i != language_map.end())
{
std::vector<LanguageSpec*>& lst = i->second;
LanguageSpec tmpspec;
tmpspec.language = language.c_str();
tmpspec.country = country.c_str();
tmpspec.modifier = modifier.c_str();
Language tmplang(&tmpspec);
LanguageSpec* best_match = 0;
int best_match_score = 0;
for(std::vector<LanguageSpec*>::iterator j = lst.begin(); j != lst.end(); ++j)
{ // Search for the language that best matches the given spec, value country more then modifier
int score = Language::match(Language(*j), tmplang);
if (score > best_match_score)
{
best_match = *j;
best_match_score = score;
}
}
assert(best_match);
return Language(best_match);
}
else
{
return Language();
}
}
Language
Language::from_name(const std::string& spec_str)
{
return from_env(resolve_language_alias(spec_str));
}
Language
Language::from_env(const std::string& env)
{
// Split LANGUAGE_COUNTRY.CODESET@MODIFIER into parts
std::string::size_type ln = env.find('_');
std::string::size_type dt = env.find('.');
std::string::size_type at = env.find('@');
std::string language;
std::string country;
std::string codeset;
std::string modifier;
//std::cout << ln << " " << dt << " " << at << std::endl;
language = env.substr(0, std::min(std::min(ln, dt), at));
if (ln != std::string::npos && ln+1 < env.size()) // _
{
country = env.substr(ln+1, (std::min(dt, at) == std::string::npos) ? std::string::npos : std::min(dt, at) - (ln+1));
}
if (dt != std::string::npos && dt+1 < env.size()) // .
{
codeset = env.substr(dt+1, (at == std::string::npos) ? std::string::npos : (at - (dt+1)));
}
if (at != std::string::npos && at+1 < env.size()) // @
{
modifier = env.substr(at+1);
}
return from_spec(language, country, modifier);
}
Language::Language(LanguageSpec* language_spec_)
: language_spec(language_spec_)
{
}
Language::Language()
: language_spec(0)
{
}
int
Language::match(const Language& lhs, const Language& rhs)
{
if (lhs.get_language() != rhs.get_language())
{
return 0;
}
else
{
static int match_tbl[3][3] = {
// modifier match, wildchard, miss
{ 9, 8, 5 }, // country match
{ 7, 6, 3 }, // country wildcard
{ 4, 2, 1 }, // country miss
};
int c;
if (lhs.get_country() == rhs.get_country())
c = 0;
else if (lhs.get_country().empty() || rhs.get_country().empty())
c = 1;
else
c = 2;
int m;
if (lhs.get_modifier() == rhs.get_modifier())
m = 0;
else if (lhs.get_modifier().empty() || rhs.get_modifier().empty())
m = 1;
else
m = 2;
return match_tbl[c][m];
}
}
std::string
Language::get_language() const
{
if (language_spec)
return language_spec->language;
else
return "";
}
std::string
Language::get_country() const
{
if (language_spec && language_spec->country)
return language_spec->country;
else
return "";
}
std::string
Language::get_modifier() const
{
if (language_spec && language_spec->modifier)
return language_spec->modifier;
else
return "";
}
std::string
Language::get_name() const
{
if (language_spec)
return language_spec->name;
else
return "";
}
std::string
Language::str() const
{
if (language_spec)
{
std::string var;
var += language_spec->language;
if (language_spec->country)
{
var += "_";
var += language_spec->country;
}
if (language_spec->modifier)
{
var += "@";
var += language_spec->modifier;
}
return var;
}
else
{
return "";
}
}
bool
Language::operator==(const Language& rhs)
{
return language_spec == rhs.language_spec;
}
bool
Language::operator!=(const Language& rhs)
{
return language_spec != rhs.language_spec;
}
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,91 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_LANGUAGE_HPP
#define HEADER_TINYGETTEXT_LANGUAGE_HPP
#include <string>
namespace tinygettext {
struct LanguageSpec;
/** Lightweight wrapper around LanguageSpec */
class Language
{
private:
LanguageSpec* language_spec;
Language(LanguageSpec* language_spec);
public:
/** Create a language from language and country code:
Example: Languge("de", "DE"); */
static Language from_spec(const std::string& language,
const std::string& country = std::string(),
const std::string& modifier = std::string());
/** Create a language from language and country code:
Example: Languge("deutsch");
Example: Languge("de_DE"); */
static Language from_name(const std::string& str);
/** Create a language from an environment variable style string (e.g de_DE.UTF-8@modifier) */
static Language from_env(const std::string& env);
/** Compares two Languages, returns 0 on missmatch and a score
between 1 and 9 on match, the higher the score the better the
match */
static int match(const Language& lhs, const Language& rhs);
/** Create an undefined Language object */
Language();
operator bool() const { return language_spec!=NULL; }
/** Returns the language code (i.e. de, en, fr) */
std::string get_language() const;
/** Returns the country code (i.e. DE, AT, US) */
std::string get_country() const;
/** Returns the modifier of the language (i.e. latn or Latn for
Serbian with non-cyrilic characters) */
std::string get_modifier() const;
/** Returns the human readable name of the Language */
std::string get_name() const;
/** Returns the Language as string in the form of an environment
variable: {language}_{country}@{modifier} */
std::string str() const;
bool operator==(const Language& rhs);
bool operator!=(const Language& rhs);
friend bool operator<(const Language& lhs, const Language& rhs);
};
inline bool operator<(const Language& lhs, const Language& rhs) {
return lhs.language_spec < rhs.language_spec;
}
} // namespace tinygettext
#endif
/* EOF */

70
src/tinygettext/log.cpp Normal file
View File

@ -0,0 +1,70 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include <iostream>
#include "log.hpp"
namespace tinygettext {
Log::log_callback_t Log::log_info_callback = &Log::default_log_callback;
Log::log_callback_t Log::log_warning_callback = &Log::default_log_callback;
Log::log_callback_t Log::log_error_callback = &Log::default_log_callback;
void
Log::default_log_callback(const std::string& str)
{
std::cerr << "tinygettext: " << str;
}
void
Log::set_log_info_callback(log_callback_t callback)
{
log_info_callback = callback;
}
void
Log::set_log_warning_callback(log_callback_t callback)
{
log_warning_callback = callback;
}
void
Log::set_log_error_callback(log_callback_t callback)
{
log_error_callback = callback;
}
Log::Log(log_callback_t callback_) :
callback(callback_),
out()
{
}
Log::~Log()
{
callback(out.str());
}
std::ostream&
Log::get()
{
return out;
}
} // namespace tinygettext
/* EOF */

56
src/tinygettext/log.hpp Normal file
View File

@ -0,0 +1,56 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_LOG_HPP
#define HEADER_TINYGETTEXT_LOG_HPP
#include <sstream>
namespace tinygettext {
class Log
{
public:
typedef void (*log_callback_t)(const std::string&);
static log_callback_t log_info_callback;
static log_callback_t log_warning_callback;
static log_callback_t log_error_callback;
static void default_log_callback(const std::string& str);
static void set_log_info_callback(log_callback_t callback);
static void set_log_warning_callback(log_callback_t callback);
static void set_log_error_callback(log_callback_t callback);
private:
log_callback_t callback;
std::ostringstream out;
public:
Log(log_callback_t callback);
~Log();
std::ostream& get();
};
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,34 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_LOG_STREAM_HPP
#define HEADER_TINYGETTEXT_LOG_STREAM_HPP
#include "log.hpp"
namespace tinygettext {
// FIXME: very bad to have such things in the API
#define log_error if (!Log::log_error_callback); else (Log(Log::log_error_callback)).get()
#define log_warning if (!Log::log_warning_callback); else (Log(Log::log_warning_callback)).get()
#define log_info if (!Log::log_info_callback); else (Log(Log::log_warning_callback)).get()
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,89 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "plural_forms.hpp"
#include <map>
namespace tinygettext {
/**
* Plural functions are used to select a string that matches a given
* count. \a n is the count and the return value is the string index
* used in the .po file, for example:
*
* msgstr[0] = "You got %d error";
* msgstr[1] = "You got %d errors";
* ^-- return value of plural function
*/
unsigned int plural1(int ) { return 0; }
unsigned int plural2_1(int n) { return (n != 1); }
unsigned int plural2_2(int n) { return (n > 1); }
unsigned int plural2_mk(int n) { return n==1 || n%10==1 ? 0 : 1; }
unsigned int plural3_lv(int n) { return static_cast<unsigned int>(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); }
unsigned int plural3_ga(int n) { return static_cast<unsigned int>(n==1 ? 0 : n==2 ? 1 : 2); }
unsigned int plural3_lt(int n) { return static_cast<unsigned int>(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); }
unsigned int plural3_1(int n) { return static_cast<unsigned int>(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
unsigned int plural3_sk(int n) { return static_cast<unsigned int>( (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2 ); }
unsigned int plural3_pl(int n) { return static_cast<unsigned int>(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); }
unsigned int plural3_sl(int n) { return static_cast<unsigned int>(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); }
unsigned int plural4_ar(int n) { return static_cast<unsigned int>( n==1 ? 0 : n==2 ? 1 : n>=3 && n<=10 ? 2 : 3 ); }
PluralForms
PluralForms::from_string(const std::string& str)
{
static std::map<std::string, class PluralForms> plural_forms;
if (plural_forms.empty())
{
// Note that the plural forms here shouldn't contain any spaces
plural_forms["Plural-Forms:nplurals=1;plural=0;"] = PluralForms(1, plural1);
plural_forms["Plural-Forms:nplurals=2;plural=(n!=1);"] = PluralForms(2, plural2_1);
plural_forms["Plural-Forms:nplurals=2;plural=n!=1;"] = PluralForms(2, plural2_1);
plural_forms["Plural-Forms:nplurals=2;plural=(n>1);"] = PluralForms(2, plural2_2);
plural_forms["Plural-Forms:nplurals=2;plural=n==1||n%10==1?0:1;"] = PluralForms(2, plural2_mk);
plural_forms["Plural-Forms:nplurals=3;plural=n%10==1&&n%100!=11?0:n!=0?1:2);"] = PluralForms(2, plural3_lv);
plural_forms["Plural-Forms:nplurals=3;plural=n==1?0:n==2?1:2;"] = PluralForms(3, plural3_ga);
plural_forms["Plural-Forms:nplurals=3;plural=(n%10==1&&n%100!=11?0:n%10>=2&&(n%100<10||n%100>=20)?1:2);"] = PluralForms(3, plural3_lt);
plural_forms["Plural-Forms:nplurals=3;plural=(n%10==1&&n%100!=11?0:n%10>=2&&n%10<=4&&(n%100<10||n%100>=20)?1:2);"] = PluralForms(3, plural3_1);
plural_forms["Plural-Forms:nplurals=3;plural=(n==1)?0:(n>=2&&n<=4)?1:2;"] = PluralForms(3, plural3_sk);
plural_forms["Plural-Forms:nplurals=3;plural=(n==1?0:n%10>=2&&n%10<=4&&(n%100<10||n%100>=20)?1:2);"] = PluralForms(3, plural3_pl);
plural_forms["Plural-Forms:nplurals=3;plural=(n%100==1?0:n%100==2?1:n%100==3||n%100==4?2:3);"] = PluralForms(3, plural3_sl);
plural_forms["Plural-Forms:nplurals=4;plural=n==1?0:n==2?1:n>=3&&n<=10?2:3;"]=PluralForms(4, plural4_ar);
}
// Remove spaces from string before lookup
std::string space_less_str;
for(std::string::size_type i = 0; i < str.size(); ++i)
if (!isspace(str[i]))
space_less_str += str[i];
std::map<std::string, class PluralForms>::const_iterator it= plural_forms.find(space_less_str);
if (it != plural_forms.end())
{
return it->second;
}
else
{
return PluralForms();
}
}
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,61 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_PLURAL_FORMS_HPP
#define HEADER_TINYGETTEXT_PLURAL_FORMS_HPP
#include <string>
namespace tinygettext {
typedef unsigned int (*PluralFunc)(int n);
class PluralForms
{
private:
unsigned int nplural;
PluralFunc plural;
public:
static PluralForms from_string(const std::string& str);
PluralForms()
: nplural(0),
plural(0)
{}
PluralForms(unsigned int nplural_, PluralFunc plural_)
: nplural(nplural_),
plural(plural_)
{}
unsigned int get_nplural() const { return nplural; }
unsigned int get_plural(int n) const { if (plural) return plural(n); else return 0; }
bool operator==(const PluralForms& other) { return nplural == other.nplural && plural == other.plural; }
bool operator!=(const PluralForms& other) { return !(*this == other); }
operator bool() const {
return plural!=NULL;
}
};
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,496 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "po_parser.hpp"
#include <iostream>
#include <ctype.h>
#include <string>
#include <istream>
#include <string.h>
#include <map>
#include <stdlib.h>
#include "language.hpp"
#include "log_stream.hpp"
#include "iconv.hpp"
#include "dictionary.hpp"
#include "plural_forms.hpp"
namespace tinygettext {
bool POParser::pedantic = true;
void
POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict)
{
POParser parser(filename, in, dict);
parser.parse();
}
class POParserError {};
POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) :
filename(filename_),
in(in_),
dict(dict_),
use_fuzzy(use_fuzzy_),
running(false),
eof(false),
big5(false),
line_number(0),
current_line()//,
//conv()
{
}
POParser::~POParser()
{
}
void
POParser::warning(const std::string& msg)
{
log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl;
//log_warning << "Line: " << current_line << std::endl;
}
void
POParser::error(const std::string& msg)
{
log_error << filename << ":" << line_number << ": error: " << msg << ": " << current_line << std::endl;
// Try to recover from an error by searching for start of another entry
do
next_line();
while(!eof && !is_empty_line());
throw POParserError();
}
void
POParser::next_line()
{
line_number += 1;
if (!std::getline(in, current_line))
eof = true;
}
void
POParser::get_string_line(std::ostringstream& out,unsigned int skip)
{
if (skip+1 >= static_cast<unsigned int>(current_line.size()))
error("unexpected end of line");
if (current_line[skip] != '"')
error("expected start of string '\"'");
std::string::size_type i;
for(i = skip+1; current_line[i] != '\"'; ++i)
{
if (big5 && static_cast<unsigned char>(current_line[i]) >= 0x81 && static_cast<unsigned char>(current_line[i]) <= 0xfe)
{
out << current_line[i];
i += 1;
if (i >= current_line.size())
error("invalid big5 encoding");
out << current_line[i];
}
else if (i >= current_line.size())
{
error("unexpected end of string");
}
else if (current_line[i] == '\\')
{
i += 1;
if (i >= current_line.size())
error("unexpected end of string in handling '\\'");
switch (current_line[i])
{
case 'a': out << '\a'; break;
case 'b': out << '\b'; break;
case 'v': out << '\v'; break;
case 'n': out << '\n'; break;
case 't': out << '\t'; break;
case 'r': out << '\r'; break;
case '"': out << '"'; break;
case '\\': out << '\\'; break;
default:
std::ostringstream err;
err << "unhandled escape '\\" << current_line[i] << "'";
warning(err.str());
out << current_line[i-1] << current_line[i];
break;
}
}
else
{
out << current_line[i];
}
}
// process trailing garbage in line and warn if there is any
for(i = i+1; i < current_line.size(); ++i)
if (!isspace(current_line[i]))
{
warning("unexpected garbage after string ignoren");
break;
}
}
std::string
POParser::get_string(unsigned int skip)
{
std::ostringstream out;
if (skip+1 >= static_cast<unsigned int>(current_line.size()))
error("unexpected end of line");
if (current_line[skip] == ' ' && current_line[skip+1] == '"')
{
get_string_line(out, skip+1);
}
else
{
if (pedantic)
warning("keyword and string must be seperated by a single space");
for(;;)
{
if (skip >= static_cast<unsigned int>(current_line.size()))
error("unexpected end of line");
else if (current_line[skip] == '\"')
{
get_string_line(out, skip);
break;
}
else if (!isspace(current_line[skip]))
{
error("string must start with '\"'");
}
else
{
// skip space
}
skip += 1;
}
}
next:
next_line();
for(std::string::size_type i = 0; i < current_line.size(); ++i)
{
if (current_line[i] == '"')
{
if (i == 1)
if (pedantic)
warning("leading whitespace before string");
get_string_line(out, i);
goto next;
}
else if (isspace(current_line[i]))
{
// skip
}
else
{
break;
}
}
return out.str();
}
static bool has_prefix(const std::string& lhs, const std::string rhs)
{
if (lhs.length() < rhs.length())
return false;
else
return lhs.compare(0, rhs.length(), rhs) == 0;
}
void
POParser::parse_header(const std::string& header)
{
std::string from_charset;
std::string::size_type start = 0;
for(std::string::size_type i = 0; i < header.length(); ++i)
{
if (header[i] == '\n')
{
std::string line = header.substr(start, i - start);
if (has_prefix(line, "Content-Type:"))
{
// from_charset = line.substr(len);
unsigned int len = strlen("Content-Type: text/plain; charset=");
if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0)
{
from_charset = line.substr(len);
for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch)
*ch = static_cast<char>(toupper(*ch));
}
else
{
warning("malformed Content-Type header");
}
}
else if (has_prefix(line, "Plural-Forms:"))
{
PluralForms plural_forms = PluralForms::from_string(line);
if (!plural_forms)
{
warning("unknown Plural-Forms given");
}
else
{
if (!dict.get_plural_forms())
{
dict.set_plural_forms(plural_forms);
}
else
{
if (dict.get_plural_forms() != plural_forms)
{
warning("Plural-Forms missmatch between .po file and dictionary");
}
}
}
}
start = i+1;
}
}
if (from_charset.empty() || from_charset == "CHARSET")
{
warning("charset not specified for .po, fallback to utf-8");
from_charset = "UTF-8";
}
else if (from_charset == "BIG5")
{
big5 = true;
}
//conv.set_charsets(from_charset, dict.get_charset());
}
bool
POParser::is_empty_line()
{
if (current_line.empty())
{
return true;
}
else if (current_line[0] == '#')
{ // handle comments as empty lines
if (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1])))
return true;
else
return false;
}
else
{
for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i)
{
if (!isspace(*i))
return false;
}
}
return true;
}
bool
POParser::prefix(const char* prefix_str)
{
return current_line.compare(0, strlen(prefix_str), prefix_str) == 0;
}
void
POParser::parse()
{
next_line();
// skip UTF-8 intro that some text editors produce
// see http://en.wikipedia.org/wiki/Byte-order_mark
if (current_line.size() >= 3 &&
current_line[0] == static_cast<unsigned char>(0xef) &&
current_line[1] == static_cast<unsigned char>(0xbb) &&
current_line[2] == static_cast<unsigned char>(0xbf))
{
current_line = current_line.substr(3);
}
// Parser structure
while(!eof)
{
try
{
bool fuzzy = false;
bool has_msgctxt = false;
std::string msgctxt;
std::string msgid;
while(prefix("#"))
{
if (current_line.size() >= 2 && current_line[1] == ',')
{
// FIXME: Rather simplistic hunt for fuzzy flag
if (current_line.find("fuzzy", 2) != std::string::npos)
fuzzy = true;
}
next_line();
}
if (!is_empty_line())
{
if (prefix("msgctxt"))
{
has_msgctxt = true;
msgctxt = get_string(7);
}
if (prefix("msgid"))
msgid = get_string(5);
else
error("expected 'msgid'");
if (prefix("msgid_plural"))
{
std::string msgid_plural = get_string(12);
std::vector<std::string> msgstr_num;
bool saw_nonempty_msgstr = false;
next:
if (is_empty_line())
{
if (msgstr_num.empty())
error("expected 'msgstr[N] (0 <= N <= 9)'");
}
else if (prefix("msgstr[") &&
current_line.size() > 8 &&
isdigit(current_line[7]) && current_line[8] == ']')
{
unsigned int number = static_cast<unsigned int>(current_line[7] - '0');
std::string msgstr = get_string(9);
if(!msgstr.empty())
saw_nonempty_msgstr = true;
if (number >= msgstr_num.size())
msgstr_num.resize(number+1);
msgstr_num[number] = msgstr; //conv.convert(msgstr);
goto next;
}
else
{
error("expected 'msgstr[N]'");
}
if (!is_empty_line())
error("expected 'msgstr[N]' or empty line");
if (saw_nonempty_msgstr)
{
if (use_fuzzy || !fuzzy)
{
if (!dict.get_plural_forms())
{
warning("msgstr[N] seen, but no Plural-Forms given");
}
else
{
if (msgstr_num.size() != dict.get_plural_forms().get_nplural())
{
warning("msgstr[N] count doesn't match Plural-Forms.nplural");
}
}
if (has_msgctxt)
dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num);
else
dict.add_translation(msgid, msgid_plural, msgstr_num);
}
if (0)
{
std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
std::cout << "msgid \"" << msgid << "\"" << std::endl;
std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl;
for(std::vector<std::string>::size_type i = 0; i < msgstr_num.size(); ++i)
std::cout << "msgstr[" << i << "] \"" << msgstr_num[i] /*conv.convert(msgstr_num[i])*/ << "\"" << std::endl;
std::cout << std::endl;
}
}
}
else if (prefix("msgstr"))
{
std::string msgstr = get_string(6);
if (msgid.empty())
{
parse_header(msgstr);
}
else if(!msgstr.empty())
{
if (use_fuzzy || !fuzzy)
{
if (has_msgctxt)
dict.add_translation(msgctxt, msgid, msgstr /*conv.convert(msgstr)*/);
else
dict.add_translation(msgid, msgstr /*conv.convert(msgstr)*/);
}
if (0)
{
std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
std::cout << "msgid \"" << msgid << "\"" << std::endl;
std::cout << "msgstr \"" << msgstr /*conv.convert(msgstr)*/ << "\"" << std::endl;
std::cout << std::endl;
}
}
}
else
{
error("expected 'msgstr' or 'msgid_plural'");
}
}
if (!is_empty_line())
error("expected empty line");
next_line();
}
catch(POParserError&)
{
}
}
}
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,80 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_PO_PARSER_HPP
#define HEADER_TINYGETTEXT_PO_PARSER_HPP
#include <iosfwd>
#include <string>
//#include "iconv.hpp"
namespace tinygettext {
class Dictionary;
class POParser
{
private:
std::string filename;
std::istream& in;
Dictionary& dict;
bool use_fuzzy;
bool running;
bool eof;
bool big5;
int line_number;
std::string current_line;
//IConv conv;
POParser(const std::string& filename, std::istream& in_, Dictionary& dict_, bool use_fuzzy = true);
~POParser();
void parse_header(const std::string& header);
void parse();
void next_line();
std::string get_string(unsigned int skip);
void get_string_line(std::ostringstream& str,unsigned int skip);
bool is_empty_line();
bool prefix(const char* );
#ifdef WIN32
void error(const std::string& msg);
#else
void error(const std::string& msg) __attribute__((__noreturn__));
#endif
void warning(const std::string& msg);
public:
/** @param filename name of the istream, only used in error messages
@param in stream from which the PO file is read.
@param dict dictionary to which the strings are written */
static void parse(const std::string& filename, std::istream& in, Dictionary& dict);
static bool pedantic;
private:
POParser (const POParser&);
POParser& operator= (const POParser&);
};
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,55 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "stk_file_system.hpp"
#include <sys/types.h>
#include <fstream>
#include <stdlib.h>
#include <string.h>
#include "io/file_manager.hpp"
namespace tinygettext {
StkFileSystem::StkFileSystem()
{
}
std::vector<std::string>
StkFileSystem::open_directory(const std::string& pathname)
{
std::set<std::string> result;
file_manager->listFiles(result, pathname, /*is_full_path*/true);
std::vector<std::string> files;
for(std::set<std::string>::iterator i=result.begin(); i!=result.end(); i++)
{
files.push_back(*i);
}
return files;
}
std::auto_ptr<std::istream>
StkFileSystem::open_file(const std::string& filename)
{
return std::auto_ptr<std::istream>(new std::ifstream(filename.c_str()));
}
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,38 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_STK_FILE_SYSTEM_HPP
#define HEADER_TINYGETTEXT_STK_FILE_SYSTEM_HPP
#include "file_system.hpp"
namespace tinygettext {
class StkFileSystem : public FileSystem
{
public:
StkFileSystem();
std::vector<std::string> open_directory(const std::string& pathname);
std::auto_ptr<std::istream> open_file(const std::string& filename);
};
} // namespace tinygettext
#endif
/* EOF */

View File

@ -0,0 +1,22 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
namespace tinygettext {
} // namespace tinygettext
/* EOF */

View File

@ -0,0 +1,27 @@
// tinygettext - A gettext replacement that works directly on .po files
// Copyright (C) 2006 Ingo Ruhnke <grumbel@gmx.de>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef HEADER_TINYGETTEXT_TINYGETTEXT_HPP
#define HEADER_TINYGETTEXT_TINYGETTEXT_HPP
#include "dictionary.hpp"
#include "dictionary_manager.hpp"
#include "language.hpp"
#endif
/* EOF */

View File

@ -40,6 +40,9 @@
#include "io/file_manager.hpp"
#include "utils/constants.hpp"
//#include "tinygettext/iconv.hpp"
#include "utils/utf8.h"
#if ENABLE_BIDI
#include <fribidi/fribidi.h>
#endif
@ -47,6 +50,8 @@
// set to 1 to debug i18n
#define TRANSLATE_VERBOSE 0
using namespace tinygettext;
Translations* translations = NULL;
const bool REMOVE_BOM = false;
@ -65,8 +70,98 @@ const std::vector<std::string>* Translations::getLanguageList() const
return &g_language_list;
}
wchar_t* utf8_to_wide(const char* input)
{
static std::vector<wchar_t> utf16line;
utf16line.clear();
utf8::utf8to16(input, input + strlen(input), back_inserter(utf16line));
utf16line.push_back(0);
return &utf16line[0];
/*
static tinygettext_iconv_t cd = 0;
if (cd == 0) cd = tinygettext_iconv_open("UTF-16", "UTF-8");
if (cd == reinterpret_cast<tinygettext_iconv_t>(-1))
{
fprintf(stderr, "[utf8_to_wide] ERROR: failed to init libiconv\n");
return L"?";
}
size_t inbytesleft = strlen(input);
size_t outbytesleft = 4*inbytesleft; // Worst case scenario: ASCII -> UTF-32?
const unsigned int BUFF_SIZE = 512*4;
if (outbytesleft > BUFF_SIZE)
{
fprintf(stderr, "[utf8_to_wide] ERROR: stirng too long : '%s'\n", input);
}
static char temp_buffer[BUFF_SIZE];
// Try to convert the text.
size_t ret = tinygettext_iconv(cd, &input, &inbytesleft, (char**)&temp_buffer, &outbytesleft);
if (ret == static_cast<size_t>(-1))
{
if (errno == EILSEQ || errno == EINVAL)
{ // invalid multibyte sequence
tinygettext_iconv(cd, NULL, NULL, NULL, NULL); // reset state
// FIXME: Could try to skip the invalid byte and continue
fprintf(stderr, "[Translation] ERROR: invalid multibyte sequence in '%s'\n", input);
}
else if (errno == E2BIG)
{ // output buffer to small
fprintf(stderr, "[Translation] ERROR: E2BIG: This should never be reached\n");
}
else if (errno == EBADF)
{
fprintf(stderr, "[Translation] ERROR: EBADF: This should never be reached\n");
}
else
{
fprintf(stderr, "[Translation] ERROR: <unknown>: This should never be reached\n");
}
return L"?";
}
else
{
if (sizeof(wchar_t) == 2)
{
return (wchar_t*)temp_buffer;
}
else if (sizeof(wchar_t) == 4)
{
static wchar_t out_buffer[512];
// FIXME: endianness?
int i = 0;
for (char* ptr = temp_buffer; ; ptr += 2)
{
out_buffer[i] = (*ptr << 8) | *(ptr + 1);
if (*ptr == 0 && *(ptr + 1) == 0) break;
i++;
}
return out_buffer;
}
else
{
fprintf(stderr, "Unknown wchar_t size : %lui\n", sizeof(wchar_t));
return L"?";
}
}
*/
}
// ----------------------------------------------------------------------------
Translations::Translations()
Translations::Translations() //: m_dictionary_manager("UTF-16")
{
#ifdef ENABLE_NLS
@ -102,6 +197,7 @@ Translations::Translations()
#endif
/*
bindtextdomain (PACKAGE, file_manager->getTranslationDir().c_str());
if (sizeof(wchar_t) == 4)
@ -120,30 +216,67 @@ Translations::Translations()
}
textdomain (PACKAGE);
*/
m_dictionary_manager.add_directory( file_manager->getTranslationDir().c_str() );
/*
const std::set<Language>& languages = m_dictionary_manager.get_languages();
std::cout << "Number of languages: " << languages.size() << std::endl;
for (std::set<Language>::const_iterator i = languages.begin(); i != languages.end(); ++i)
{
const Language& language = *i;
std::cout << "Env: " << language.str() << std::endl
<< "Name: " << language.get_name() << std::endl
<< "Language: " << language.get_language() << std::endl
<< "Country: " << language.get_country() << std::endl
<< "Modifier: " << language.get_modifier() << std::endl
<< std::endl;
}
*/
const char* lang = getenv("LANG");
const char* language = getenv("LANGUAGE");
if (language != NULL && strlen(language) > 0)
{
printf("Env var LANGUAGE = '%s', which corresponds to %s\n", language, Language::from_env(language).get_name().c_str());
m_dictionary = m_dictionary_manager.get_dictionary(Language::from_env(language));
}
else if (lang != NULL && strlen(lang) > 0)
{
printf("Env var LANG = '%s'\n", lang);
m_dictionary = m_dictionary_manager.get_dictionary(Language::from_env(lang));
}
else
{
m_dictionary = m_dictionary_manager.get_dictionary();
}
// This is a silly but working hack I added to determine whether the current language is RTL or
// not, since gettext doesn't seem to provide this information
//std::string test = m_dictionary.translate("Loading");
//printf("'%s'\n", test.c_str());
// This one is just for the xgettext parser to pick up
#define ignore(X)
ignore(_(" Is this a RTL language?"));
//I18N: Do NOT literally translate this string!! Please enter Y as the translation if your language is a RTL (right-to-left) language, N (or nothing) otherwise
const char* isRtl = gettext(" Is this a RTL language?");
const wchar_t* isRtlW = reinterpret_cast<const wchar_t*>(isRtl);
const std::string isRtl = m_dictionary.translate(" Is this a RTL language?");
m_rtl = false;
for (int n=0; isRtlW[n] != 0; n++)
for (unsigned int n=0; n < isRtl.size() != 0; n++)
{
if (isRtlW[n] == 'Y')
if (isRtl[n] == 'Y')
{
m_rtl = true;
break;
}
}
#endif
} // Translations
@ -236,7 +369,7 @@ const wchar_t* Translations::w_gettext(const char* original)
#endif
#if ENABLE_NLS
const char* original_t = gettext(original);
const std::string& original_t = m_dictionary.translate(original);
#else
m_converted_string = core::stringw(original);
return m_converted_string.c_str();
@ -259,7 +392,7 @@ const wchar_t* Translations::w_gettext(const char* original)
}
}*/
if(original_t==original)
if (original_t == original)
{
m_converted_string = core::stringw(original);
@ -272,7 +405,9 @@ const wchar_t* Translations::w_gettext(const char* original)
// print
//for (int n=0;; n+=4)
wchar_t* out_ptr = (wchar_t*)original_t;
wchar_t* original_tw = utf8_to_wide(original_t.c_str());
wchar_t* out_ptr = original_tw;
if (REMOVE_BOM) out_ptr++;
#if TRANSLATE_VERBOSE

View File

@ -26,11 +26,7 @@
#include "utils/string_utils.hpp"
#if ENABLE_NLS
# ifdef __APPLE__
# include <libintl/libintl.h>
# else
# include <libintl.h>
# endif
# include "tinygettext/tinygettext.hpp"
# define _(String, ...) (translations->fribidize(StringUtils::insertValues(translations->w_gettext(String), ##__VA_ARGS__)))
# define _LTR(String, ...) (StringUtils::insertValues(translations->w_gettext(String), ##__VA_ARGS__))
@ -50,6 +46,9 @@
class Translations
{
private:
tinygettext::DictionaryManager m_dictionary_manager;
tinygettext::Dictionary m_dictionary;
irr::core::stringw m_converted_string;
bool m_rtl;

34
src/utils/utf8.h Normal file
View File

@ -0,0 +1,34 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "utf8/checked.h"
#include "utf8/unchecked.h"
#endif // header guard

327
src/utils/utf8/checked.h Normal file
View File

@ -0,0 +1,327 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include <stdexcept>
namespace utf8
{
// Base for the exceptions that may be thrown from the library
class exception : public std::exception {
};
// Exceptions that may be thrown from the library functions.
class invalid_code_point : public exception {
uint32_t cp;
public:
invalid_code_point(uint32_t cp) : cp(cp) {}
virtual const char* what() const throw() { return "Invalid code point"; }
uint32_t code_point() const {return cp;}
};
class invalid_utf8 : public exception {
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
virtual const char* what() const throw() { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
class invalid_utf16 : public exception {
uint16_t u16;
public:
invalid_utf16 (uint16_t u) : u16(u) {}
virtual const char* what() const throw() { return "Invalid UTF-16"; }
uint16_t utf16_word() const {return u16;}
};
class not_enough_room : public exception {
public:
virtual const char* what() const throw() { return "Not enough space"; }
};
/// The library API - functions intended to be called by the users
template <typename octet_iterator, typename output_iterator>
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
{
while (start != end) {
octet_iterator sequence_start = start;
internal::utf_error err_code = internal::validate_next(start, end);
switch (err_code) {
case internal::UTF8_OK :
for (octet_iterator it = sequence_start; it != start; ++it)
*out++ = *it;
break;
case internal::NOT_ENOUGH_ROOM:
throw not_enough_room();
case internal::INVALID_LEAD:
append (replacement, out);
++start;
break;
case internal::INCOMPLETE_SEQUENCE:
case internal::OVERLONG_SEQUENCE:
case internal::INVALID_CODE_POINT:
append (replacement, out);
++start;
// just one replacement mark for the sequence
while (internal::is_trail(*start) && start != end)
++start;
break;
}
}
return out;
}
template <typename octet_iterator, typename output_iterator>
inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
{
static const uint32_t replacement_marker = internal::mask16(0xfffd);
return replace_invalid(start, end, out, replacement_marker);
}
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (!internal::is_code_point_valid(cp))
throw invalid_code_point(cp);
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it, octet_iterator end)
{
uint32_t cp = 0;
internal::utf_error err_code = internal::validate_next(it, end, &cp);
switch (err_code) {
case internal::UTF8_OK :
break;
case internal::NOT_ENOUGH_ROOM :
throw not_enough_room();
case internal::INVALID_LEAD :
case internal::INCOMPLETE_SEQUENCE :
case internal::OVERLONG_SEQUENCE :
throw invalid_utf8(*it);
case internal::INVALID_CODE_POINT :
throw invalid_code_point(cp);
}
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it, octet_iterator end)
{
return next(it, end);
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it, octet_iterator start)
{
// can't do much if it == start
if (it == start)
throw not_enough_room();
octet_iterator end = it;
// Go back until we hit either a lead octet or start
while (internal::is_trail(*(--it)))
if (it == start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
return peek_next(it, end);
}
/// Deprecated in versions that include "prior"
template <typename octet_iterator>
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
{
octet_iterator end = it;
while (internal::is_trail(*(--it)))
if (it == pass_start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
octet_iterator temp = it;
return next(temp, end);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n, octet_iterator end)
{
for (distance_type i = 0; i < n; ++i)
next(it, end);
}
template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
next(first, last);
return dist;
}
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp)) {
if (start != end) {
uint32_t trail_surrogate = internal::mask16(*start++);
if (internal::is_trail_surrogate(trail_surrogate))
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
else
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
}
else
throw invalid_utf16(static_cast<uint16_t>(cp));
}
// Lone trail surrogate
else if (internal::is_trail_surrogate(cp))
throw invalid_utf16(static_cast<uint16_t>(cp));
result = append(cp, result);
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start != end) {
uint32_t cp = next(start, end);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start != end)
(*result++) = next(start, end);
return result;
}
// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
public:
iterator () {};
explicit iterator (const octet_iterator& octet_it,
const octet_iterator& range_start,
const octet_iterator& range_end) :
it(octet_it), range_start(range_start), range_end(range_end)
{
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
{
octet_iterator temp = it;
return next(temp, range_end);
}
bool operator == (const iterator& rhs) const
{
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
{
next(it, range_end);
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
next(it, range_end);
return temp;
}
iterator& operator -- ()
{
prior(it, range_start);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
prior(it, range_start);
return temp;
}
}; // class iterator
} // namespace utf8
#endif //header guard

358
src/utils/utf8/core.h Executable file
View File

@ -0,0 +1,358 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include <iterator>
namespace utf8
{
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
// You may need to change them to match your system.
// These typedefs have the same names as ones from cstdint, or boost/cstdint
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
// Helper code - not intended to be directly called by the library users. May be changed at any time
namespace internal
{
// Unicode constants
// Leading (high) surrogates: 0xd800 - 0xdbff
// Trailing (low) surrogates: 0xdc00 - 0xdfff
const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
// Maximum valid value for a Unicode code point
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
template<typename octet_type>
inline uint8_t mask8(octet_type oc)
{
return static_cast<uint8_t>(0xff & oc);
}
template<typename u16_type>
inline uint16_t mask16(u16_type oc)
{
return static_cast<uint16_t>(0xffff & oc);
}
template<typename octet_type>
inline bool is_trail(octet_type oc)
{
return ((mask8(oc) >> 6) == 0x2);
}
template <typename u16>
inline bool is_lead_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
}
template <typename u16>
inline bool is_trail_surrogate(u16 cp)
{
return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u16>
inline bool is_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u32>
inline bool is_code_point_valid(u32 cp)
{
return (cp <= CODE_POINT_MAX && !is_surrogate(cp));
}
template <typename octet_iterator>
inline typename std::iterator_traits<octet_iterator>::difference_type
sequence_length(octet_iterator lead_it)
{
uint8_t lead = mask8(*lead_it);
if (lead < 0x80)
return 1;
else if ((lead >> 5) == 0x6)
return 2;
else if ((lead >> 4) == 0xe)
return 3;
else if ((lead >> 3) == 0x1e)
return 4;
else
return 0;
}
template <typename octet_difference_type>
inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
{
if (cp < 0x80) {
if (length != 1)
return true;
}
else if (cp < 0x800) {
if (length != 2)
return true;
}
else if (cp < 0x10000) {
if (length != 3)
return true;
}
return false;
}
enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
/// get_sequence_x functions decode utf-8 sequences of the length x
template <typename octet_iterator>
utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
if (it != end) {
if (code_point)
*code_point = mask8(*it);
return UTF8_OK;
}
return NOT_ENOUGH_ROOM;
}
template <typename octet_iterator>
utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
utf_error ret_code = NOT_ENOUGH_ROOM;
if (it != end) {
uint32_t cp = mask8(*it);
if (++it != end) {
if (is_trail(*it)) {
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
if (code_point)
*code_point = cp;
ret_code = UTF8_OK;
}
else
ret_code = INCOMPLETE_SEQUENCE;
}
else
ret_code = NOT_ENOUGH_ROOM;
}
return ret_code;
}
template <typename octet_iterator>
utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
utf_error ret_code = NOT_ENOUGH_ROOM;
if (it != end) {
uint32_t cp = mask8(*it);
if (++it != end) {
if (is_trail(*it)) {
cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff);
if (++it != end) {
if (is_trail(*it)) {
cp += (*it) & 0x3f;
if (code_point)
*code_point = cp;
ret_code = UTF8_OK;
}
else
ret_code = INCOMPLETE_SEQUENCE;
}
else
ret_code = NOT_ENOUGH_ROOM;
}
else
ret_code = INCOMPLETE_SEQUENCE;
}
else
ret_code = NOT_ENOUGH_ROOM;
}
return ret_code;
}
template <typename octet_iterator>
utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
utf_error ret_code = NOT_ENOUGH_ROOM;
if (it != end) {
uint32_t cp = mask8(*it);
if (++it != end) {
if (is_trail(*it)) {
cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff);
if (++it != end) {
if (is_trail(*it)) {
cp += (mask8(*it) << 6) & 0xfff;
if (++it != end) {
if (is_trail(*it)) {
cp += (*it) & 0x3f;
if (code_point)
*code_point = cp;
ret_code = UTF8_OK;
}
else
ret_code = INCOMPLETE_SEQUENCE;
}
else
ret_code = NOT_ENOUGH_ROOM;
}
else
ret_code = INCOMPLETE_SEQUENCE;
}
else
ret_code = NOT_ENOUGH_ROOM;
}
else
ret_code = INCOMPLETE_SEQUENCE;
}
else
ret_code = NOT_ENOUGH_ROOM;
}
return ret_code;
}
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
// Save the original value of it so we can go back in case of failure
// Of course, it does not make much sense with i.e. stream iterators
octet_iterator original_it = it;
uint32_t cp = 0;
// Determine the sequence length based on the lead octet
typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
octet_difference_type length = sequence_length(it);
if (length == 0)
return INVALID_LEAD;
// Now that we have a valid sequence length, get trail octets and calculate the code point
utf_error err = UTF8_OK;
switch (length) {
case 1:
err = get_sequence_1(it, end, &cp);
break;
case 2:
err = get_sequence_2(it, end, &cp);
break;
case 3:
err = get_sequence_3(it, end, &cp);
break;
case 4:
err = get_sequence_4(it, end, &cp);
break;
}
if (err == UTF8_OK) {
// Decoding succeeded. Now, security checks...
if (is_code_point_valid(cp)) {
if (!is_overlong_sequence(cp, length)){
// Passed! Return here.
if (code_point)
*code_point = cp;
++it;
return UTF8_OK;
}
else
err = OVERLONG_SEQUENCE;
}
else
err = INVALID_CODE_POINT;
}
// Failure branch - restore the original value of the iterator
it = original_it;
return err;
}
template <typename octet_iterator>
inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
return validate_next(it, end, 0);
}
} // namespace internal
/// The library API - functions intended to be called by the users
// Byte order mark
const uint8_t bom[] = {0xef, 0xbb, 0xbf};
template <typename octet_iterator>
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
{
octet_iterator result = start;
while (result != end) {
internal::utf_error err_code = internal::validate_next(result, end);
if (err_code != internal::UTF8_OK)
return result;
}
return result;
}
template <typename octet_iterator>
inline bool is_valid(octet_iterator start, octet_iterator end)
{
return (find_invalid(start, end) == end);
}
template <typename octet_iterator>
inline bool starts_with_bom (octet_iterator it, octet_iterator end)
{
return (
((it != end) && (internal::mask8(*it++)) == bom[0]) &&
((it != end) && (internal::mask8(*it++)) == bom[1]) &&
((it != end) && (internal::mask8(*it)) == bom[2])
);
}
//Deprecated in release 2.3
template <typename octet_iterator>
inline bool is_bom (octet_iterator it)
{
return (
(internal::mask8(*it++)) == bom[0] &&
(internal::mask8(*it++)) == bom[1] &&
(internal::mask8(*it)) == bom[2]
);
}
} // namespace utf8
#endif // header guard

228
src/utils/utf8/unchecked.h Executable file
View File

@ -0,0 +1,228 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
namespace utf8
{
namespace unchecked
{
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
}
template <typename octet_iterator>
uint32_t next(octet_iterator& it)
{
uint32_t cp = internal::mask8(*it);
typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
switch (length) {
case 1:
break;
case 2:
it++;
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
break;
case 3:
++it;
cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff);
++it;
cp += (*it) & 0x3f;
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff);
++it;
cp += (internal::mask8(*it) << 6) & 0xfff;
++it;
cp += (*it) & 0x3f;
break;
}
++it;
return cp;
}
template <typename octet_iterator>
uint32_t peek_next(octet_iterator it)
{
return next(it);
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it)
{
while (internal::is_trail(*(--it))) ;
octet_iterator temp = it;
return next(temp);
}
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
template <typename octet_iterator>
inline uint32_t previous(octet_iterator& it)
{
return prior(it);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n)
{
for (distance_type i = 0; i < n; ++i)
next(it);
}
template <typename octet_iterator>
typename std::iterator_traits<octet_iterator>::difference_type
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits<octet_iterator>::difference_type dist;
for (dist = 0; first < last; ++dist)
next(first);
return dist;
}
template <typename u16bit_iterator, typename octet_iterator>
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
if (internal::is_lead_surrogate(cp)) {
uint32_t trail_surrogate = internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}
result = append(cp, result);
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start < end) {
uint32_t cp = next(start);
if (cp > 0xffff) { //make a surrogate pair
*result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
*result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
}
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = next(start);
return result;
}
// The iterator class
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
public:
iterator () {};
explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
// the default "big three" are OK
octet_iterator base () const { return it; }
uint32_t operator * () const
{
octet_iterator temp = it;
return next(temp);
}
bool operator == (const iterator& rhs) const
{
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
{
return !(operator == (rhs));
}
iterator& operator ++ ()
{
std::advance(it, internal::sequence_length(it));
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
std::advance(it, internal::sequence_length(it));
return temp;
}
iterator& operator -- ()
{
prior(it);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
prior(it);
return temp;
}
}; // class iterator
} // namespace utf8::unchecked
} // namespace utf8
#endif // header guard