diff --git a/src/Makefile.am b/src/Makefile.am index 918382ee4..d01e69200 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -362,6 +362,26 @@ supertuxkart_SOURCES = \ states_screens/tracks_screen.hpp \ states_screens/tutorial_screen.cpp \ states_screens/tutorial_screen.hpp \ + tinygettext/dictionary.cpp \ + tinygettext/dictionary.hpp \ + tinygettext/dictionary_manager.cpp \ + tinygettext/dictionary_manager.hpp \ + tinygettext/file_system.hpp \ + tinygettext/iconv.cpp \ + tinygettext/iconv.hpp \ + tinygettext/language.cpp \ + tinygettext/language.hpp \ + tinygettext/log.cpp \ + tinygettext/log.hpp \ + tinygettext/log_stream.hpp \ + tinygettext/plural_forms.cpp \ + tinygettext/plural_forms.hpp \ + tinygettext/po_parser.cpp \ + tinygettext/po_parser.hpp \ + tinygettext/stk_file_system.cpp \ + tinygettext/stk_file_system.hpp \ + tinygettext/tinygettext.cpp \ + tinygettext/tinygettext.hpp \ tracks/ambient_light_sphere.cpp \ tracks/ambient_light_sphere.hpp \ tracks/bezier_curve.cpp \ @@ -419,4 +439,3 @@ supertuxkart_LDADD = \ $(irrlicht_LIBS) $(fribidi_LIBS) $(bullet_LIBS) $(enet_LIBS) \ $(opengl_LIBS) $(openal_LIBS) $(oggvorbis_LIBS) \ $(INTLLIBS) $(LIBCURL_LIBS) $(LIBCURL_CFLAGS) - diff --git a/src/states_screens/race_gui.cpp b/src/states_screens/race_gui.cpp index 18fd63890..3a5d39d76 100644 --- a/src/states_screens/race_gui.cpp +++ b/src/states_screens/race_gui.cpp @@ -49,8 +49,12 @@ using namespace irr; #include "utils/string_utils.hpp" #include "utils/translation.hpp" +#ifdef __APPLE__ #include #include +#else +#include +#endif /** The constructor is called before anything is attached to the scene node. * So rendering to a texture can be done here. But world is not yet fully diff --git a/src/tinygettext/dictionary.cpp b/src/tinygettext/dictionary.cpp new file mode 100644 index 000000000..9765d751d --- /dev/null +++ b/src/tinygettext/dictionary.cpp @@ -0,0 +1,208 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include +#include "log_stream.hpp" +#include "dictionary.hpp" + +namespace tinygettext { + +Dictionary::Dictionary(const std::string& charset_) : + entries(), + ctxt_entries(), + charset(charset_), + plural_forms() +{ +} + +Dictionary::~Dictionary() +{ +} + +std::string +Dictionary::get_charset() const +{ + return charset; +} + +void +Dictionary::set_plural_forms(const PluralForms& plural_forms_) +{ + plural_forms = plural_forms_; +} + +PluralForms +Dictionary::get_plural_forms() const +{ + return plural_forms; +} + +std::string +Dictionary::translate_plural(const std::string& msgid, const std::string& msgid_plural, int num) +{ + return translate_plural(entries, msgid, msgid_plural, num); +} + +std::string +Dictionary::translate_plural(const Entries& dict, const std::string& msgid, const std::string& msgid_plural, int count) +{ + Entries::const_iterator i = dict.find(msgid); + const std::vector& msgstrs = i->second; + + if (i != dict.end()) + { + unsigned int n = 0; + n = plural_forms.get_plural(count); + assert(/*n >= 0 &&*/ n < msgstrs.size()); + + if (!msgstrs[n].empty()) + return msgstrs[n]; + else + if (count == 1) // default to english rules + return msgid; + else + return msgid_plural; + } + else + { + log_info << "Couldn't translate: " << msgid << std::endl; + log_info << "Candidates: " << std::endl; + for (i = dict.begin(); i != dict.end(); ++i) + log_info << "'" << i->first << "'" << std::endl; + + if (count == 1) // default to english rules + return msgid; + else + return msgid_plural; + } +} + +std::string +Dictionary::translate(const std::string& msgid) +{ + return translate(entries, msgid); +} + +std::string +Dictionary::translate(const Entries& dict, const std::string& msgid) +{ + Entries::const_iterator i = dict.find(msgid); + if (i != dict.end() && !i->second.empty()) + { + return i->second[0]; + } + else + { + log_info << "Couldn't translate: " << msgid << std::endl; + return msgid; + } +} + +std::string +Dictionary::translate_ctxt(const std::string& msgctxt, const std::string& msgid) +{ + CtxtEntries::iterator i = ctxt_entries.find(msgctxt); + if (i != ctxt_entries.end()) + { + return translate(i->second, msgid); + } + else + { + log_info << "Couldn't translate: " << msgid << std::endl; + return msgid; + } +} + +std::string +Dictionary::translate_ctxt_plural(const std::string& msgctxt, + const std::string& msgid, const std::string& msgidplural, int num) +{ + CtxtEntries::iterator i = ctxt_entries.find(msgctxt); + if (i != ctxt_entries.end()) + { + return translate_plural(i->second, msgid, msgidplural, num); + } + else + { + log_info << "Couldn't translate: " << msgid << std::endl; + if (num != 1) // default to english + return msgidplural; + else + return msgid; + } +} + +void +Dictionary::add_translation(const std::string& msgid, const std::string& , + const std::vector& msgstrs) +{ + // Do we need msgid2 for anything? its after all supplied to the + // translate call, so we just throw it away here + entries[msgid] = msgstrs; +} + +void +Dictionary::add_translation(const std::string& msgid, const std::string& msgstr) +{ + std::vector& vec = entries[msgid]; + if (vec.empty()) + { + vec.push_back(msgstr); + } + else + { + log_warning << "collision in add_translation: '" + << msgid << "' -> '" << msgstr << "' vs '" << vec[0] << "'" << std::endl; + vec[0] = msgstr; + } +} + +void +Dictionary::add_translation(const std::string& msgctxt, + const std::string& msgid, const std::string& msgid_plural, + const std::vector& msgstrs) +{ + std::vector& vec = ctxt_entries[msgctxt][msgid]; + if (vec.empty()) + { + vec = msgstrs; + } + else + { + log_warning << "collision in add_translation(\"" << msgctxt << "\", \"" << msgid << "\", \"" << msgid_plural << "\")" << std::endl; + vec = msgstrs; + } +} + +void +Dictionary::add_translation(const std::string& msgctxt, const std::string& msgid, const std::string& msgstr) +{ + std::vector& vec = ctxt_entries[msgctxt][msgid]; + if (vec.empty()) + { + vec.push_back(msgstr); + } + else + { + log_warning << "collision in add_translation(\"" << msgctxt << "\", \"" << msgid << "\")" << std::endl; + vec[0] = msgstr; + } +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/dictionary.hpp b/src/tinygettext/dictionary.hpp new file mode 100644 index 000000000..743e075a0 --- /dev/null +++ b/src/tinygettext/dictionary.hpp @@ -0,0 +1,123 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_DICTIONARY_HPP +#define HEADER_TINYGETTEXT_DICTIONARY_HPP + +#include +#include +#include +#include "plural_forms.hpp" + +namespace tinygettext { + +/** A simple dictionary class that mimics gettext() behaviour. Each + Dictionary only works for a single language, for managing multiple + languages and .po files at once use the DictionaryManager. */ +class Dictionary +{ +private: + typedef std::map > Entries; + Entries entries; + + typedef std::map CtxtEntries; + CtxtEntries ctxt_entries; + + std::string charset; + PluralForms plural_forms; + + std::string translate(const Entries& dict, const std::string& msgid); + std::string translate_plural(const Entries& dict, const std::string& msgid, const std::string& msgidplural, int num); + +public: + /** Constructs a dictionary converting to the specified \a charset (default UTF-8) */ + Dictionary(const std::string& charset = "UTF-8"); + ~Dictionary(); + + /** Return the charset used for this dictionary */ + std::string get_charset() const; + + void set_plural_forms(const PluralForms&); + PluralForms get_plural_forms() const; + + + /** Translate the string \a msgid. */ + std::string translate(const std::string& msgid); + + /** Translate the string \a msgid to its correct plural form, based + on the number of items given by \a num. \a msgid_plural is \a msgid in + plural form. */ + std::string translate_plural(const std::string& msgid, const std::string& msgidplural, int num); + + /** Translate the string \a msgid that is in context \a msgctx. A + context is a way to disambiguate msgids that contain the same + letters, but different meaning. For example "exit" might mean to + quit doing something or it might refer to a door that leads + outside (i.e. 'Ausgang' vs 'Beenden' in german) */ + std::string translate_ctxt(const std::string& msgctxt, const std::string& msgid); + + std::string translate_ctxt_plural(const std::string& msgctxt, const std::string& msgid, const std::string& msgidplural, int num); + + /** Add a translation from \a msgid to \a msgstr to the dictionary, + where \a msgid is the singular form of the message, msgid_plural the + plural form and msgstrs a table of translations. The right + translation will be calculated based on the \a num argument to + translate(). */ + void add_translation(const std::string& msgid, const std::string& msgid_plural, + const std::vector& msgstrs); + void add_translation(const std::string& msgctxt, + const std::string& msgid, const std::string& msgid_plural, + const std::vector& msgstrs); + + /** Add a translation from \a msgid to \a msgstr to the + dictionary */ + void add_translation(const std::string& msgid, const std::string& msgstr); + void add_translation(const std::string& msgctxt, const std::string& msgid, const std::string& msgstr); + + /** Iterate over all messages, Func is of type: + void func(const std::string& msgid, const std::vector& msgstrs) */ + template + Func foreach(Func func) + { + for(Entries::iterator i = entries.begin(); i != entries.end(); ++i) + { + func(i->first, i->second); + } + return func; + } + + /** Iterate over all messages with a context, Func is of type: + void func(const std::string& ctxt, const std::string& msgid, const std::vector& msgstrs) */ + template + Func foreach_ctxt(Func func) + { + for(CtxtEntries::iterator i = ctxt_entries.begin(); i != ctxt_entries.end(); ++i) + { + for(Entries::iterator j = i->second.begin(); j != i->second.end(); ++j) + { + func(i->first, j->first, j->second); + } + } + return func; + } +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/dictionary_manager.cpp b/src/tinygettext/dictionary_manager.cpp new file mode 100644 index 000000000..b686ebd55 --- /dev/null +++ b/src/tinygettext/dictionary_manager.cpp @@ -0,0 +1,242 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include "dictionary_manager.hpp" + +#include +#include +#include +#include +#include +#include + +#include "log_stream.hpp" +#include "po_parser.hpp" +#include "stk_file_system.hpp" + +namespace tinygettext { + +static bool has_suffix(const std::string& lhs, const std::string rhs) +{ + if (lhs.length() < rhs.length()) + return false; + else + return lhs.compare(lhs.length() - rhs.length(), rhs.length(), rhs) == 0; +} + +DictionaryManager::DictionaryManager(const std::string& charset_) : + dictionaries(), + search_path(), + charset(charset_), + use_fuzzy(true), + current_language(), + current_dict(0), + empty_dict(), + filesystem(new StkFileSystem) +{ +} + +DictionaryManager::~DictionaryManager() +{ + for(Dictionaries::iterator i = dictionaries.begin(); i != dictionaries.end(); ++i) + { + delete i->second; + } +} + +void +DictionaryManager::clear_cache() +{ + for(Dictionaries::iterator i = dictionaries.begin(); i != dictionaries.end(); ++i) + { + delete i->second; + } + dictionaries.clear(); + + current_dict = 0; +} + +Dictionary& +DictionaryManager::get_dictionary() +{ + if (current_dict) + { + return *current_dict; + } + else + { + if (current_language) + { + current_dict = &get_dictionary(current_language); + return *current_dict; + } + else + { + return empty_dict; + } + } +} + +Dictionary& +DictionaryManager::get_dictionary(const Language& language) +{ + //log_debug << "Dictionary for language \"" << spec << "\" requested" << std::endl; + //log_debug << "...normalized as \"" << lang << "\"" << std::endl; + assert(language); + + Dictionaries::iterator i = dictionaries.find(language); + if (i != dictionaries.end()) + { + return *i->second; + } + else // Dictionary for languages lang isn't loaded, so we load it + { + //log_debug << "get_dictionary: " << lang << std::endl; + Dictionary* dict = new Dictionary(charset); + + dictionaries[language] = dict; + + for (SearchPath::reverse_iterator p = search_path.rbegin(); p != search_path.rend(); ++p) + { + std::vector files = filesystem->open_directory(*p); + + std::string best_filename; + int best_score = 0; + + for(std::vector::iterator filename = files.begin(); filename != files.end(); filename++) + { + // check if filename matches requested language + if (has_suffix(*filename, ".po")) + { // ignore anything that isn't a .po file + Language po_language = Language::from_env(filename->substr(0, filename->size()-3)); + + if (!po_language) + { + log_warning << *filename << ": warning: ignoring, unknown language" << std::endl; + } + else + { + int score = Language::match(language, po_language); + + if (score > best_score) + { + best_score = score; + best_filename = *filename; + } + } + } + } + + if (!best_filename.empty()) + { + std::string pofile = *p + "/" + best_filename; + try + { + std::auto_ptr in = filesystem->open_file(pofile); + if (!in.get()) + { + log_error << "error: failure opening: " << pofile << std::endl; + } + else + { + POParser::parse(pofile, *in, *dict); + } + } + catch(std::exception& e) + { + log_error << "error: failure parsing: " << pofile << std::endl; + log_error << e.what() << "" << std::endl; + } + } + } + + return *dict; + } +} + +std::set +DictionaryManager::get_languages() +{ + std::set languages; + + for (SearchPath::iterator p = search_path.begin(); p != search_path.end(); ++p) + { + std::vector files = filesystem->open_directory(*p); + + for(std::vector::iterator file = files.begin(); file != files.end(); ++file) + { + if (has_suffix(*file, ".po")) + { + languages.insert(Language::from_env(file->substr(0, file->size()-3))); + } + } + } + return languages; +} + +void +DictionaryManager::set_language(const Language& language) +{ + if (current_language != language) + { + current_language = language; + current_dict = 0; + } +} + +Language +DictionaryManager::get_language() const +{ + return current_language; +} + +void +DictionaryManager::set_charset(const std::string& charset_) +{ + clear_cache(); // changing charset invalidates cache + charset = charset_; +} + +void +DictionaryManager::set_use_fuzzy(bool t) +{ + clear_cache(); + use_fuzzy = t; +} + +bool +DictionaryManager::get_use_fuzzy() const +{ + return use_fuzzy; +} + +void +DictionaryManager::add_directory(const std::string& pathname) +{ + clear_cache(); // adding directories invalidates cache + search_path.push_back(pathname); +} + +void +DictionaryManager::set_filesystem(std::auto_ptr filesystem_) +{ + filesystem = filesystem_; +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/dictionary_manager.hpp b/src/tinygettext/dictionary_manager.hpp new file mode 100644 index 000000000..7c3d7f3bc --- /dev/null +++ b/src/tinygettext/dictionary_manager.hpp @@ -0,0 +1,99 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_DICTIONARY_MANAGER_HPP +#define HEADER_TINYGETTEXT_DICTIONARY_MANAGER_HPP + +#include +#include +#include +#include +#include + +#include "dictionary.hpp" +#include "language.hpp" + +namespace tinygettext { + +class FileSystem; + +/** Manager class for dictionaries, you give it a bunch of directories + with .po files and it will then automatically load the right file + on demand depending on which language was set. */ +class DictionaryManager +{ +private: + typedef std::map Dictionaries; + Dictionaries dictionaries; + + typedef std::vector SearchPath; + SearchPath search_path; + + std::string charset; + bool use_fuzzy; + + Language current_language; + Dictionary* current_dict; + + Dictionary empty_dict; + + std::auto_ptr filesystem; + + void clear_cache(); + +public: + DictionaryManager(const std::string& charset_ = "UTF-8"); + ~DictionaryManager(); + + /** Return the currently active dictionary, if none is set, an empty + dictionary is returned. */ + Dictionary& get_dictionary(); + + /** Get dictionary for language */ + Dictionary& get_dictionary(const Language& language); + + /** Set a language based on a four? letter country code */ + void set_language(const Language& language); + + /** returns the (normalized) country code of the currently used language */ + Language get_language() const; + + void set_use_fuzzy(bool t); + bool get_use_fuzzy() const; + + /** Set a charset that will be set on the returned dictionaries */ + void set_charset(const std::string& charset); + + /** Add a directory to the search path for dictionaries, earlier + added directories have higher priority then later added ones */ + void add_directory(const std::string& pathname); + + /** Return a set of the available languages in their country code */ + std::set get_languages(); + + void set_filesystem(std::auto_ptr filesystem); + +private: + DictionaryManager (const DictionaryManager&); + DictionaryManager& operator= (const DictionaryManager&); +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/file_system.hpp b/src/tinygettext/file_system.hpp new file mode 100644 index 000000000..af47aaf96 --- /dev/null +++ b/src/tinygettext/file_system.hpp @@ -0,0 +1,42 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_FILE_SYSTEM_HPP +#define HEADER_TINYGETTEXT_FILE_SYSTEM_HPP + +#include +#include +#include +#include + +namespace tinygettext { + +class FileSystem +{ +public: + virtual ~FileSystem() {} + + virtual std::vector open_directory(const std::string& pathname) =0; + virtual std::auto_ptr open_file(const std::string& filename) =0; +}; + +} // namespace tinygettext + +#endif + +/* EOF */ + diff --git a/src/tinygettext/iconv.cpp b/src/tinygettext/iconv.cpp new file mode 100644 index 000000000..c0b8b6072 --- /dev/null +++ b/src/tinygettext/iconv.cpp @@ -0,0 +1,148 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include +#include +#include +#include +#include +#include +#include + +#include "iconv.hpp" +#include "log_stream.hpp" + +namespace tinygettext { + +#ifndef tinygettext_ICONV_CONST +# define tinygettext_ICONV_CONST +#endif + +IConv::IConv() + : to_charset(), + from_charset(), + cd(0) +{} + +IConv::IConv(const std::string& from_charset_, const std::string& to_charset_) + : to_charset(), + from_charset(), + cd(0) +{ + set_charsets(from_charset_, to_charset_); +} + +IConv::~IConv() +{ + if (cd) + tinygettext_iconv_close(cd); +} + +void +IConv::set_charsets(const std::string& from_charset_, const std::string& to_charset_) +{ + if (cd) + tinygettext_iconv_close(cd); + + from_charset = from_charset_; + to_charset = to_charset_; + + for(std::string::iterator i = to_charset.begin(); i != to_charset.end(); ++i) + *i = static_cast(toupper(*i)); + + for(std::string::iterator i = from_charset.begin(); i != from_charset.end(); ++i) + *i = static_cast(toupper(*i)); + + if (to_charset == from_charset) + { + cd = 0; + } + else + { + cd = tinygettext_iconv_open(to_charset.c_str(), from_charset.c_str()); + if (cd == reinterpret_cast(-1)) + { + if(errno == EINVAL) + { + std::ostringstream str; + str << "IConv construction failed: conversion from '" << from_charset + << "' to '" << to_charset << "' not available"; + throw std::runtime_error(str.str()); + } + else + { + std::ostringstream str; + str << "IConv: construction failed: " << strerror(errno); + throw std::runtime_error(str.str()); + } + } + } +} + +/// Convert a string from encoding to another. +std::string +IConv::convert(const std::string& text) +{ + if (!cd) + { + return text; + } + else + { + size_t inbytesleft = text.size(); + size_t outbytesleft = 4*inbytesleft; // Worst case scenario: ASCII -> UTF-32? + + // We try to avoid to much copying around, so we write directly into + // a std::string + tinygettext_ICONV_CONST char* inbuf = const_cast(&text[0]); + std::string result(outbytesleft, 'X'); + char* outbuf = &result[0]; + + // Try to convert the text. + size_t ret = tinygettext_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (ret == static_cast(-1)) + { + if (errno == EILSEQ || errno == EINVAL) + { // invalid multibyte sequence + tinygettext_iconv(cd, NULL, NULL, NULL, NULL); // reset state + + // FIXME: Could try to skip the invalid byte and continue + log_error << "error: tinygettext:iconv: invalid multibyte sequence in: \"" << text << "\"" << std::endl; + } + else if (errno == E2BIG) + { // output buffer to small + assert(!"tinygettext/iconv.cpp: E2BIG: This should never be reached"); + } + else if (errno == EBADF) + { + assert(!"tinygettext/iconv.cpp: EBADF: This should never be reached"); + } + else + { + assert(!"tinygettext/iconv.cpp: : This should never be reached"); + } + } + + result.resize(4*text.size() - outbytesleft); + + return result; + } +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/iconv.hpp b/src/tinygettext/iconv.hpp new file mode 100644 index 000000000..1ae1750b9 --- /dev/null +++ b/src/tinygettext/iconv.hpp @@ -0,0 +1,71 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef HEADER_TINYGETTEXT_ICONV_HPP +#define HEADER_TINYGETTEXT_ICONV_HPP + +#include + +#ifdef HAVE_SDL +# include "SDL.h" + +# define tinygettext_ICONV_CONST const +# define tinygettext_iconv_t SDL_iconv_t +# define tinygettext_iconv SDL_iconv +# define tinygettext_iconv_open SDL_iconv_open +# define tinygettext_iconv_close SDL_iconv_close +#else +# include + +# ifdef HAVE_ICONV_CONST +# define tinygettext_ICONV_CONST ICONV_CONST +# else +# define tinygettext_ICONV_CONST +# endif + +# define tinygettext_iconv_t iconv_t +# define tinygettext_iconv iconv +# define tinygettext_iconv_open iconv_open +# define tinygettext_iconv_close iconv_close +#endif + +namespace tinygettext { + +class IConv +{ +private: + std::string to_charset; + std::string from_charset; + tinygettext_iconv_t cd; + +public: + IConv(); + IConv(const std::string& fromcode, const std::string& tocode); + ~IConv(); + + void set_charsets(const std::string& fromcode, const std::string& tocode); + std::string convert(const std::string& text); + +private: + IConv (const IConv&); + IConv& operator= (const IConv&); +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/language.cpp b/src/tinygettext/language.cpp new file mode 100644 index 000000000..b9a1b46e3 --- /dev/null +++ b/src/tinygettext/language.cpp @@ -0,0 +1,568 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include "language.hpp" + +#include +#include +#include + +namespace tinygettext { + +struct LanguageSpec { + /** Language code: "de", "en", ... */ + const char* language; + + /** Country code: "BR", "DE", ..., can be 0 */ + const char* country; + + /** Modifier/Varint: "Latn", "ije", "latin"..., can be 0 */ + const char* modifier; + + /** Language name: "German", "English", "French", ... */ + const char* name; +}; + +/** Language Definitions */ +//*{ +LanguageSpec languages[] = { + { "aa", 0, 0, "Afar" }, + { "af", 0, 0, "Afrikaans" }, + { "af", "ZA", 0, "Afrikaans (South Africa)" }, + { "am", 0, 0, "Amharic" }, + { "ar", 0, 0, "Arabic" }, + { "ar", "AR", 0, "Arabic (Argentina)" }, + { "ar", "OM", 0, "Arabic (Oman)" }, + { "ar", "SA", 0, "Arabic (Saudi Arabia)" }, + { "ar", "SY", 0, "Arabic (Syrian Arab Republic)" }, + { "ar", "TN", 0, "Arabic (Tunisia)" }, + { "as", 0, 0, "Assamese" }, + { "ast",0, 0, "Asturian" }, + { "ay", 0, 0, "Aymara" }, + { "az", 0, 0, "Azerbaijani" }, + { "az", "IR", 0, "Azerbaijani (Iran)" }, + { "be", 0, 0, "Belarusian" }, + { "be", 0, "latin", "Belarusian" }, + { "bg", 0, 0, "Bulgarian" }, + { "bg", "BG", 0, "Bulgarian (Bulgaria)" }, + { "bn", 0, 0, "Bengali" }, + { "bn", "BD", 0, "Bengali (Bangladesh)" }, + { "bn", "IN", 0, "Bengali (India)" }, + { "bo", 0, 0, "Tibetan" }, + { "br", 0, 0, "Breton" }, + { "bs", 0, 0, "Bosnian" }, + { "bs", "BA", 0, "Bosnian (Bosnia/Herzegovina)"}, + { "bs", "BS", 0, "Bosnian (Bahamas)" }, + { "ca", "ES", "valencia", "Catalan (valencia)" }, + { "ca", "ES", 0, "Catalan (Spain)" }, + { "ca", 0, "valencia", "Catalan (valencia)" }, + { "ca", 0, 0, "Catalan" }, + { "co", 0, 0, "Corsican" }, + { "cs", 0, 0, "Czech" }, + { "cs", "CZ", 0, "Czech (Czech Republic)" }, + { "cy", 0, 0, "Welsh" }, + { "cy", "GB", 0, "Welsh (Great Britain)" }, + { "cz", 0, 0, "Unknown language" }, + { "da", 0, 0, "Danish" }, + { "da", "DK", 0, "Danish (Denmark)" }, + { "de", 0, 0, "German" }, + { "de", "AT", 0, "German (Austria)" }, + { "de", "CH", 0, "German (Switzerland)" }, + { "de", "DE", 0, "German (Germany)" }, + { "dk", 0, 0, "Unknown language" }, + { "dz", 0, 0, "Dzongkha" }, + { "el", 0, 0, "Greek" }, + { "el", "GR", 0, "Greek (Greece)" }, + { "en", 0, 0, "English" }, + { "en", "AU", 0, "English (Australia)" }, + { "en", "CA", 0, "English (Canada)" }, + { "en", "GB", 0, "English (Great Britain)" }, + { "en", "US", 0, "English (United States)" }, + { "en", "ZA", 0, "English (South Africa)" }, + { "en", 0, "boldquot", "English" }, + { "en", 0, "quot", "English" }, + { "en", "US", "piglatin", "English" }, + { "eo", 0, 0, "Esperanto" }, + { "es", 0, 0, "Spanish" }, + { "es", "AR", 0, "Spanish (Argentina)" }, + { "es", "CL", 0, "Spanish (Chile)" }, + { "es", "CO", 0, "Spanish (Colombia)" }, + { "es", "CR", 0, "Spanish (Costa Rica)" }, + { "es", "DO", 0, "Spanish (Dominican Republic)"}, + { "es", "EC", 0, "Spanish (Ecuador)" }, + { "es", "ES", 0, "Spanish (Spain)" }, + { "es", "GT", 0, "Spanish (Guatemala)" }, + { "es", "HN", 0, "Spanish (Honduras)" }, + { "es", "LA", 0, "Spanish (Laos)" }, + { "es", "MX", 0, "Spanish (Mexico)" }, + { "es", "NI", 0, "Spanish (Nicaragua)" }, + { "es", "PA", 0, "Spanish (Panama)" }, + { "es", "PE", 0, "Spanish (Peru)" }, + { "es", "PR", 0, "Spanish (Puerto Rico)" }, + { "es", "SV", 0, "Spanish (El Salvador)" }, + { "es", "UY", 0, "Spanish (Uruguay)" }, + { "es", "VE", 0, "Spanish (Venezuela)" }, + { "et", 0, 0, "Estonian" }, + { "et", "EE", 0, "Estonian (Estonia)" }, + { "et", "ET", 0, "Estonian (Ethiopia)" }, + { "eu", 0, 0, "Basque" }, + { "eu", "ES", 0, "Basque (Spain)" }, + { "fa", 0, 0, "Persian" }, + { "fa", "AF", 0, "Persian (Afghanistan)" }, + { "fa", "IR", 0, "Persian (Iran)" }, + { "fi", 0, 0, "Finnish" }, + { "fi", "FI", 0, "Finnish (Finland)" }, + { "fo", 0, 0, "Faroese" }, + { "fo", "FO", 0, "Faeroese (Faroe Islands)" }, + { "fr", 0, 0, "French" }, + { "fr", "CA", 0, "French (Canada)" }, + { "fr", "CH", 0, "French (Switzerland)" }, + { "fr", "FR", 0, "French (France)" }, + { "fr", "LU", 0, "French (Luxembourg)" }, + { "fy", 0, 0, "Frisian" }, + { "ga", 0, 0, "Irish" }, + { "gd", 0, 0, "Gaelic Scots" }, + { "gl", 0, 0, "Galician" }, + { "gl", "ES", 0, "Galician (Spain)" }, + { "gn", 0, 0, "Guarani" }, + { "gu", 0, 0, "Gujarati" }, + { "gv", 0, 0, "Manx" }, + { "ha", 0, 0, "Hausa" }, + { "he", 0, 0, "Hebrew" }, + { "he", "IL", 0, "Hebrew (Israel)" }, + { "hi", 0, 0, "Hindi" }, + { "hr", 0, 0, "Croatian" }, + { "hr", "HR", 0, "Croatian (Croatia)" }, + { "hu", 0, 0, "Hungarian" }, + { "hu", "HU", 0, "Hungarian (Hungary)" }, + { "hy", 0, 0, "Armenian" }, + { "ia", 0, 0, "Interlingua" }, + { "id", 0, 0, "Indonesian" }, + { "id", "ID", 0, "Indonesian (Indonesia)" }, + { "is", 0, 0, "Icelandic" }, + { "is", "IS", 0, "Icelandic (Iceland)" }, + { "it", 0, 0, "Italian" }, + { "it", "CH", 0, "Italian (Switzerland)" }, + { "it", "IT", 0, "Italian (Italy)" }, + { "iu", 0, 0, "Inuktitut" }, + { "ja", 0, 0, "Japanese" }, + { "ja", "JP", 0, "Japanese (Japan)" }, + { "ka", 0, 0, "Georgian" }, + { "kk", 0, 0, "Kazakh" }, + { "kl", 0, 0, "Kalaallisut" }, + { "km", 0, 0, "Khmer" }, + { "km", "KH", 0, "Khmer (Cambodia)" }, + { "kn", 0, 0, "Kannada" }, + { "ko", 0, 0, "Korean" }, + { "ko", "KR", 0, "Korean (Korea)" }, + { "ku", 0, 0, "Kurdish" }, + { "kw", 0, 0, "Cornish" }, + { "ky", 0, 0, "Kirghiz" }, + { "la", 0, 0, "Latin" }, + { "lo", 0, 0, "Lao" }, + { "lt", 0, 0, "Lithuanian" }, + { "lt", "LT", 0, "Lithuanian (Lithuania)" }, + { "lv", 0, 0, "Latvian" }, + { "lv", "LV", 0, "Latvian (Latvia)" }, + { "mg", 0, 0, "Malagasy" }, + { "mi", 0, 0, "Maori" }, + { "mk", 0, 0, "Macedonian" }, + { "mk", "MK", 0, "Macedonian (Macedonia)" }, + { "ml", 0, 0, "Malayalam" }, + { "mn", 0, 0, "Mongolian" }, + { "mr", 0, 0, "Marathi" }, + { "ms", 0, 0, "Malay" }, + { "ms", "MY", 0, "Malay (Malaysia)" }, + { "mt", 0, 0, "Maltese" }, + { "my", 0, 0, "Burmese" }, + { "my", "MM", 0, "Burmese (Myanmar)" }, + { "nb", 0, 0, "Norwegian Bokmal" }, + { "nb", "NO", 0, "Norwegian Bokmål (Norway)" }, + { "ne", 0, 0, "Nepali" }, + { "nl", 0, 0, "Dutch" }, + { "nl", "BE", 0, "Dutch (Belgium)" }, + { "nl", "NL", 0, "Dutch (Netherlands)" }, + { "nn", 0, 0, "Norwegian Nynorsk" }, + { "nn", "NO", 0, "Norwegian Nynorsk (Norway)" }, + { "no", 0, 0, "Norwegian" }, + { "no", "NO", 0, "Norwegian (Norway)" }, + { "no", "NY", 0, "Norwegian (NY)" }, + { "nr", 0, 0, "Ndebele, South" }, + { "oc", 0, 0, "Occitan post 1500" }, + { "om", 0, 0, "Oromo" }, + { "or", 0, 0, "Oriya" }, + { "pa", 0, 0, "Punjabi" }, + { "pl", 0, 0, "Polish" }, + { "pl", "PL", 0, "Polish (Poland)" }, + { "ps", 0, 0, "Pashto" }, + { "pt", 0, 0, "Portuguese" }, + { "pt", "BR", 0, "Brazilian" }, + { "pt", "PT", 0, "Portuguese (Portugal)" }, + { "qu", 0, 0, "Quechua" }, + { "rm", 0, 0, "Rhaeto-Romance" }, + { "ro", 0, 0, "Romanian" }, + { "ro", "RO", 0, "Romanian (Romania)" }, + { "ru", 0, 0, "Russian" }, + { "ru", "RU", 0, "Russian (Russia" }, + { "rw", 0, 0, "Kinyarwanda" }, + { "sa", 0, 0, "Sanskrit" }, + { "sd", 0, 0, "Sindhi" }, + { "se", 0, 0, "Sami" }, + { "se", "NO", 0, "Sami (Norway)" }, + { "si", 0, 0, "Sinhalese" }, + { "sk", 0, 0, "Slovak" }, + { "sk", "SK", 0, "Slovak (Slovakia)" }, + { "sl", 0, 0, "Slovenian" }, + { "sl", "SI", 0, "Slovenian (Slovenia)" }, + { "sl", "SL", 0, "Slovenian (Sierra Leone)" }, + { "sm", 0, 0, "Samoan" }, + { "so", 0, 0, "Somali" }, + { "sp", 0, 0, "Unknown language" }, + { "sq", 0, 0, "Albanian" }, + { "sq", "AL", 0, "Albanian (Albania)" }, + { "sr", 0, 0, "Serbian" }, + { "sr", "YU", 0, "Serbian (Yugoslavia)" }, + { "sr", 0,"ije", "Serbian" }, + { "sr", 0, "latin", "Serbian" }, + { "sr", 0, "Latn", "Serbian" }, + { "ss", 0, 0, "Swati" }, + { "st", 0, 0, "Sotho" }, + { "sv", 0, 0, "Swedish" }, + { "sv", "SE", 0, "Swedish (Sweden)" }, + { "sv", "SV", 0, "Swedish (El Salvador)" }, + { "sw", 0, 0, "Swahili" }, + { "ta", 0, 0, "Tamil" }, + { "te", 0, 0, "Telugu" }, + { "tg", 0, 0, "Tajik" }, + { "th", 0, 0, "Thai" }, + { "th", "TH", 0, "Thai (Thailand)" }, + { "ti", 0, 0, "Tigrinya" }, + { "tk", 0, 0, "Turkmen" }, + { "tl", 0, 0, "Tagalog" }, + { "to", 0, 0, "Tonga" }, + { "tr", 0, 0, "Turkish" }, + { "tr", "TR", 0, "Turkish (Turkey)" }, + { "ts", 0, 0, "Tsonga" }, + { "tt", 0, 0, "Tatar" }, + { "ug", 0, 0, "Uighur" }, + { "uk", 0, 0, "Ukrainian" }, + { "uk", "UA", 0, "Ukrainian (Ukraine)" }, + { "ur", 0, 0, "Urdu" }, + { "ur", "PK", 0, "Urdu (Pakistan)" }, + { "uz", 0, 0, "Uzbek" }, + { "uz", 0, "cyrillic", "Uzbek" }, + { "vi", 0, 0, "Vietnamese" }, + { "vi", "VN", 0, "Vietnamese (Vietnam)" }, + { "wa", 0, 0, "Walloon" }, + { "wo", 0, 0, "Wolof" }, + { "xh", 0, 0, "Xhosa" }, + { "yi", 0, 0, "Yiddish" }, + { "yo", 0, 0, "Yoruba" }, + { "zh", 0, 0, "Chinese" }, + { "zh", "CN", 0, "Chinese (simplified)" }, + { "zh", "HK", 0, "Chinese (Hong Kong)" }, + { "zh", "TW", 0, "Chinese (traditional)" }, + { "zu", 0, 0, "Zulu" }, + { NULL, 0, 0, NULL } +}; +//*} + +std::string +resolve_language_alias(const std::string& name) +{ + typedef std::map Aliases; + static Aliases language_aliases; + if (language_aliases.empty()) + { + // FIXME: Many of those are not useful for us, since we leave + // encoding to the app, not to the language, we could/should + // also match against all language names, not just aliases from + // locale.alias + + // Aliases taken from /etc/locale.alias + language_aliases["bokmal"] = "nb_NO.ISO-8859-1"; + language_aliases["bokmål"] = "nb_NO.ISO-8859-1"; + language_aliases["catalan"] = "ca_ES.ISO-8859-1"; + language_aliases["croatian"] = "hr_HR.ISO-8859-2"; + language_aliases["czech"] = "cs_CZ.ISO-8859-2"; + language_aliases["danish"] = "da_DK.ISO-8859-1"; + language_aliases["dansk"] = "da_DK.ISO-8859-1"; + language_aliases["deutsch"] = "de_DE.ISO-8859-1"; + language_aliases["dutch"] = "nl_NL.ISO-8859-1"; + language_aliases["eesti"] = "et_EE.ISO-8859-1"; + language_aliases["estonian"] = "et_EE.ISO-8859-1"; + language_aliases["finnish"] = "fi_FI.ISO-8859-1"; + language_aliases["français"] = "fr_FR.ISO-8859-1"; + language_aliases["french"] = "fr_FR.ISO-8859-1"; + language_aliases["galego"] = "gl_ES.ISO-8859-1"; + language_aliases["galician"] = "gl_ES.ISO-8859-1"; + language_aliases["german"] = "de_DE.ISO-8859-1"; + language_aliases["greek"] = "el_GR.ISO-8859-7"; + language_aliases["hebrew"] = "he_IL.ISO-8859-8"; + language_aliases["hrvatski"] = "hr_HR.ISO-8859-2"; + language_aliases["hungarian"] = "hu_HU.ISO-8859-2"; + language_aliases["icelandic"] = "is_IS.ISO-8859-1"; + language_aliases["italian"] = "it_IT.ISO-8859-1"; + language_aliases["japanese"] = "ja_JP.eucJP"; + language_aliases["japanese.euc"] = "ja_JP.eucJP"; + language_aliases["ja_JP"] = "ja_JP.eucJP"; + language_aliases["ja_JP.ujis"] = "ja_JP.eucJP"; + language_aliases["japanese.sjis"] = "ja_JP.SJIS"; + language_aliases["korean"] = "ko_KR.eucKR"; + language_aliases["korean.euc"] = "ko_KR.eucKR"; + language_aliases["ko_KR"] = "ko_KR.eucKR"; + language_aliases["lithuanian"] = "lt_LT.ISO-8859-13"; + language_aliases["no_NO"] = "nb_NO.ISO-8859-1"; + language_aliases["no_NO.ISO-8859-1"] = "nb_NO.ISO-8859-1"; + language_aliases["norwegian"] = "nb_NO.ISO-8859-1"; + language_aliases["nynorsk"] = "nn_NO.ISO-8859-1"; + language_aliases["polish"] = "pl_PL.ISO-8859-2"; + language_aliases["portuguese"] = "pt_PT.ISO-8859-1"; + language_aliases["romanian"] = "ro_RO.ISO-8859-2"; + language_aliases["russian"] = "ru_RU.ISO-8859-5"; + language_aliases["slovak"] = "sk_SK.ISO-8859-2"; + language_aliases["slovene"] = "sl_SI.ISO-8859-2"; + language_aliases["slovenian"] = "sl_SI.ISO-8859-2"; + language_aliases["spanish"] = "es_ES.ISO-8859-1"; + language_aliases["swedish"] = "sv_SE.ISO-8859-1"; + language_aliases["thai"] = "th_TH.TIS-620"; + language_aliases["turkish"] = "tr_TR.ISO-8859-9"; + } + + std::string name_lowercase; + name_lowercase.resize(name.size()); + for(std::string::size_type i = 0; i < name.size(); ++i) + name_lowercase[i] = static_cast(tolower(name[i])); + + Aliases::iterator i = language_aliases.find(name_lowercase); + if (i != language_aliases.end()) + { + return i->second; + } + else + { + return name; + } +} + +Language +Language::from_spec(const std::string& language, const std::string& country, const std::string& modifier) +{ + static std::map > language_map; + + if (language_map.empty()) + { // Init language_map + for(int i = 0; languages[i].language != NULL; ++i) + language_map[languages[i].language].push_back(&languages[i]); + } + + std::map >::iterator i = language_map.find(language); + if (i != language_map.end()) + { + std::vector& lst = i->second; + + LanguageSpec tmpspec; + tmpspec.language = language.c_str(); + tmpspec.country = country.c_str(); + tmpspec.modifier = modifier.c_str(); + Language tmplang(&tmpspec); + + LanguageSpec* best_match = 0; + int best_match_score = 0; + for(std::vector::iterator j = lst.begin(); j != lst.end(); ++j) + { // Search for the language that best matches the given spec, value country more then modifier + int score = Language::match(Language(*j), tmplang); + + if (score > best_match_score) + { + best_match = *j; + best_match_score = score; + } + } + assert(best_match); + return Language(best_match); + } + else + { + return Language(); + } +} + +Language +Language::from_name(const std::string& spec_str) +{ + return from_env(resolve_language_alias(spec_str)); +} + +Language +Language::from_env(const std::string& env) +{ + // Split LANGUAGE_COUNTRY.CODESET@MODIFIER into parts + std::string::size_type ln = env.find('_'); + std::string::size_type dt = env.find('.'); + std::string::size_type at = env.find('@'); + + std::string language; + std::string country; + std::string codeset; + std::string modifier; + + //std::cout << ln << " " << dt << " " << at << std::endl; + + language = env.substr(0, std::min(std::min(ln, dt), at)); + + if (ln != std::string::npos && ln+1 < env.size()) // _ + { + country = env.substr(ln+1, (std::min(dt, at) == std::string::npos) ? std::string::npos : std::min(dt, at) - (ln+1)); + } + + if (dt != std::string::npos && dt+1 < env.size()) // . + { + codeset = env.substr(dt+1, (at == std::string::npos) ? std::string::npos : (at - (dt+1))); + } + + if (at != std::string::npos && at+1 < env.size()) // @ + { + modifier = env.substr(at+1); + } + + return from_spec(language, country, modifier); +} + +Language::Language(LanguageSpec* language_spec_) + : language_spec(language_spec_) +{ +} + +Language::Language() + : language_spec(0) +{ +} + +int +Language::match(const Language& lhs, const Language& rhs) +{ + if (lhs.get_language() != rhs.get_language()) + { + return 0; + } + else + { + static int match_tbl[3][3] = { + // modifier match, wildchard, miss + { 9, 8, 5 }, // country match + { 7, 6, 3 }, // country wildcard + { 4, 2, 1 }, // country miss + }; + + int c; + if (lhs.get_country() == rhs.get_country()) + c = 0; + else if (lhs.get_country().empty() || rhs.get_country().empty()) + c = 1; + else + c = 2; + + int m; + if (lhs.get_modifier() == rhs.get_modifier()) + m = 0; + else if (lhs.get_modifier().empty() || rhs.get_modifier().empty()) + m = 1; + else + m = 2; + + return match_tbl[c][m]; + } +} + +std::string +Language::get_language() const +{ + if (language_spec) + return language_spec->language; + else + return ""; +} + +std::string +Language::get_country() const +{ + if (language_spec && language_spec->country) + return language_spec->country; + else + return ""; +} + +std::string +Language::get_modifier() const +{ + if (language_spec && language_spec->modifier) + return language_spec->modifier; + else + return ""; +} + +std::string +Language::get_name() const +{ + if (language_spec) + return language_spec->name; + else + return ""; +} + +std::string +Language::str() const +{ + if (language_spec) + { + std::string var; + var += language_spec->language; + if (language_spec->country) + { + var += "_"; + var += language_spec->country; + } + + if (language_spec->modifier) + { + var += "@"; + var += language_spec->modifier; + } + return var; + } + else + { + return ""; + } +} + +bool +Language::operator==(const Language& rhs) +{ + return language_spec == rhs.language_spec; +} + +bool +Language::operator!=(const Language& rhs) +{ + return language_spec != rhs.language_spec; +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/language.hpp b/src/tinygettext/language.hpp new file mode 100644 index 000000000..90eccc1d4 --- /dev/null +++ b/src/tinygettext/language.hpp @@ -0,0 +1,91 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_LANGUAGE_HPP +#define HEADER_TINYGETTEXT_LANGUAGE_HPP + +#include + +namespace tinygettext { + +struct LanguageSpec; + +/** Lightweight wrapper around LanguageSpec */ +class Language +{ +private: + LanguageSpec* language_spec; + + Language(LanguageSpec* language_spec); + +public: + /** Create a language from language and country code: + Example: Languge("de", "DE"); */ + static Language from_spec(const std::string& language, + const std::string& country = std::string(), + const std::string& modifier = std::string()); + + /** Create a language from language and country code: + Example: Languge("deutsch"); + Example: Languge("de_DE"); */ + static Language from_name(const std::string& str); + + /** Create a language from an environment variable style string (e.g de_DE.UTF-8@modifier) */ + static Language from_env(const std::string& env); + + /** Compares two Languages, returns 0 on missmatch and a score + between 1 and 9 on match, the higher the score the better the + match */ + static int match(const Language& lhs, const Language& rhs); + + /** Create an undefined Language object */ + Language(); + + operator bool() const { return language_spec; } + + /** Returns the language code (i.e. de, en, fr) */ + std::string get_language() const; + + /** Returns the country code (i.e. DE, AT, US) */ + std::string get_country() const; + + /** Returns the modifier of the language (i.e. latn or Latn for + Serbian with non-cyrilic characters) */ + std::string get_modifier() const; + + /** Returns the human readable name of the Language */ + std::string get_name() const; + + /** Returns the Language as string in the form of an environment + variable: {language}_{country}@{modifier} */ + std::string str() const; + + bool operator==(const Language& rhs); + bool operator!=(const Language& rhs); + + friend bool operator<(const Language& lhs, const Language& rhs); +}; + +inline bool operator<(const Language& lhs, const Language& rhs) { + return lhs.language_spec < rhs.language_spec; +} + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/log.cpp b/src/tinygettext/log.cpp new file mode 100644 index 000000000..be8e2e691 --- /dev/null +++ b/src/tinygettext/log.cpp @@ -0,0 +1,70 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include +#include "log.hpp" + +namespace tinygettext { + +Log::log_callback_t Log::log_info_callback = &Log::default_log_callback; +Log::log_callback_t Log::log_warning_callback = &Log::default_log_callback; +Log::log_callback_t Log::log_error_callback = &Log::default_log_callback; + +void +Log::default_log_callback(const std::string& str) +{ + std::cerr << "tinygettext: " << str; +} + +void +Log::set_log_info_callback(log_callback_t callback) +{ + log_info_callback = callback; +} + +void +Log::set_log_warning_callback(log_callback_t callback) +{ + log_warning_callback = callback; +} + +void +Log::set_log_error_callback(log_callback_t callback) +{ + log_error_callback = callback; +} + +Log::Log(log_callback_t callback_) : + callback(callback_), + out() +{ +} + +Log::~Log() +{ + callback(out.str()); +} + +std::ostream& +Log::get() +{ + return out; +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/log.hpp b/src/tinygettext/log.hpp new file mode 100644 index 000000000..a8eadb461 --- /dev/null +++ b/src/tinygettext/log.hpp @@ -0,0 +1,56 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_LOG_HPP +#define HEADER_TINYGETTEXT_LOG_HPP + +#include + +namespace tinygettext { + +class Log +{ +public: + typedef void (*log_callback_t)(const std::string&); + + static log_callback_t log_info_callback; + static log_callback_t log_warning_callback; + static log_callback_t log_error_callback; + + + static void default_log_callback(const std::string& str); + + static void set_log_info_callback(log_callback_t callback); + static void set_log_warning_callback(log_callback_t callback); + static void set_log_error_callback(log_callback_t callback); + +private: + log_callback_t callback; + std::ostringstream out; + +public: + Log(log_callback_t callback); + ~Log(); + + std::ostream& get(); +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/log_stream.hpp b/src/tinygettext/log_stream.hpp new file mode 100644 index 000000000..8ad4c022d --- /dev/null +++ b/src/tinygettext/log_stream.hpp @@ -0,0 +1,34 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_LOG_STREAM_HPP +#define HEADER_TINYGETTEXT_LOG_STREAM_HPP + +#include "log.hpp" + +namespace tinygettext { + +// FIXME: very bad to have such things in the API +#define log_error if (!Log::log_error_callback); else (Log(Log::log_error_callback)).get() +#define log_warning if (!Log::log_warning_callback); else (Log(Log::log_warning_callback)).get() +#define log_info if (!Log::log_info_callback); else (Log(Log::log_warning_callback)).get() + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/plural_forms.cpp b/src/tinygettext/plural_forms.cpp new file mode 100644 index 000000000..8271437b7 --- /dev/null +++ b/src/tinygettext/plural_forms.cpp @@ -0,0 +1,89 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include "plural_forms.hpp" + +#include + +namespace tinygettext { + +/** + * Plural functions are used to select a string that matches a given + * count. \a n is the count and the return value is the string index + * used in the .po file, for example: + * + * msgstr[0] = "You got %d error"; + * msgstr[1] = "You got %d errors"; + * ^-- return value of plural function + */ +unsigned int plural1(int ) { return 0; } +unsigned int plural2_1(int n) { return (n != 1); } +unsigned int plural2_2(int n) { return (n > 1); } +unsigned int plural2_mk(int n) { return n==1 || n%10==1 ? 0 : 1; } +unsigned int plural3_lv(int n) { return static_cast(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2); } +unsigned int plural3_ga(int n) { return static_cast(n==1 ? 0 : n==2 ? 1 : 2); } +unsigned int plural3_lt(int n) { return static_cast(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2); } +unsigned int plural3_1(int n) { return static_cast(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); } +unsigned int plural3_sk(int n) { return static_cast( (n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2 ); } +unsigned int plural3_pl(int n) { return static_cast(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); } +unsigned int plural3_sl(int n) { return static_cast(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3); } +unsigned int plural4_ar(int n) { return static_cast( n==1 ? 0 : n==2 ? 1 : n>=3 && n<=10 ? 2 : 3 ); } + +PluralForms +PluralForms::from_string(const std::string& str) +{ + static std::map plural_forms; + + if (plural_forms.empty()) + { + // Note that the plural forms here shouldn't contain any spaces + plural_forms["Plural-Forms:nplurals=1;plural=0;"] = PluralForms(1, plural1); + plural_forms["Plural-Forms:nplurals=2;plural=(n!=1);"] = PluralForms(2, plural2_1); + plural_forms["Plural-Forms:nplurals=2;plural=n!=1;"] = PluralForms(2, plural2_1); + plural_forms["Plural-Forms:nplurals=2;plural=(n>1);"] = PluralForms(2, plural2_2); + plural_forms["Plural-Forms:nplurals=2;plural=n==1||n%10==1?0:1;"] = PluralForms(2, plural2_mk); + plural_forms["Plural-Forms:nplurals=3;plural=n%10==1&&n%100!=11?0:n!=0?1:2);"] = PluralForms(2, plural3_lv); + plural_forms["Plural-Forms:nplurals=3;plural=n==1?0:n==2?1:2;"] = PluralForms(3, plural3_ga); + plural_forms["Plural-Forms:nplurals=3;plural=(n%10==1&&n%100!=11?0:n%10>=2&&(n%100<10||n%100>=20)?1:2);"] = PluralForms(3, plural3_lt); + plural_forms["Plural-Forms:nplurals=3;plural=(n%10==1&&n%100!=11?0:n%10>=2&&n%10<=4&&(n%100<10||n%100>=20)?1:2);"] = PluralForms(3, plural3_1); + plural_forms["Plural-Forms:nplurals=3;plural=(n==1)?0:(n>=2&&n<=4)?1:2;"] = PluralForms(3, plural3_sk); + plural_forms["Plural-Forms:nplurals=3;plural=(n==1?0:n%10>=2&&n%10<=4&&(n%100<10||n%100>=20)?1:2);"] = PluralForms(3, plural3_pl); + plural_forms["Plural-Forms:nplurals=3;plural=(n%100==1?0:n%100==2?1:n%100==3||n%100==4?2:3);"] = PluralForms(3, plural3_sl); + + plural_forms["Plural-Forms:nplurals=4;plural=n==1?0:n==2?1:n>=3&&n<=10?2:3;"]=PluralForms(4, plural4_ar); + } + + // Remove spaces from string before lookup + std::string space_less_str; + for(std::string::size_type i = 0; i < str.size(); ++i) + if (!isspace(str[i])) + space_less_str += str[i]; + + std::map::const_iterator it= plural_forms.find(space_less_str); + if (it != plural_forms.end()) + { + return it->second; + } + else + { + return PluralForms(); + } +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/plural_forms.hpp b/src/tinygettext/plural_forms.hpp new file mode 100644 index 000000000..0b064494b --- /dev/null +++ b/src/tinygettext/plural_forms.hpp @@ -0,0 +1,61 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_PLURAL_FORMS_HPP +#define HEADER_TINYGETTEXT_PLURAL_FORMS_HPP + +#include + +namespace tinygettext { + +typedef unsigned int (*PluralFunc)(int n); + +class PluralForms +{ +private: + unsigned int nplural; + PluralFunc plural; + +public: + static PluralForms from_string(const std::string& str); + + PluralForms() + : nplural(0), + plural(0) + {} + + PluralForms(unsigned int nplural_, PluralFunc plural_) + : nplural(nplural_), + plural(plural_) + {} + + unsigned int get_nplural() const { return nplural; } + unsigned int get_plural(int n) const { if (plural) return plural(n); else return 0; } + + bool operator==(const PluralForms& other) { return nplural == other.nplural && plural == other.plural; } + bool operator!=(const PluralForms& other) { return !(*this == other); } + + operator bool() const { + return plural; + } +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/po_parser.cpp b/src/tinygettext/po_parser.cpp new file mode 100644 index 000000000..5ceb3fd26 --- /dev/null +++ b/src/tinygettext/po_parser.cpp @@ -0,0 +1,496 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include "po_parser.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "language.hpp" +#include "log_stream.hpp" +#include "iconv.hpp" +#include "dictionary.hpp" +#include "plural_forms.hpp" + +namespace tinygettext { + +bool POParser::pedantic = true; + +void +POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict) +{ + POParser parser(filename, in, dict); + parser.parse(); +} + +class POParserError {}; + +POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) : + filename(filename_), + in(in_), + dict(dict_), + use_fuzzy(use_fuzzy_), + running(false), + eof(false), + big5(false), + line_number(0), + current_line(), + conv() +{ +} + +POParser::~POParser() +{ +} + +void +POParser::warning(const std::string& msg) +{ + log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl; + //log_warning << "Line: " << current_line << std::endl; +} + +void +POParser::error(const std::string& msg) +{ + log_error << filename << ":" << line_number << ": error: " << msg << ": " << current_line << std::endl; + + // Try to recover from an error by searching for start of another entry + do + next_line(); + while(!eof && !is_empty_line()); + + throw POParserError(); +} + +void +POParser::next_line() +{ + line_number += 1; + if (!std::getline(in, current_line)) + eof = true; +} + +void +POParser::get_string_line(std::ostringstream& out,unsigned int skip) +{ + if (skip+1 >= static_cast(current_line.size())) + error("unexpected end of line"); + + if (current_line[skip] != '"') + error("expected start of string '\"'"); + + std::string::size_type i; + for(i = skip+1; current_line[i] != '\"'; ++i) + { + if (big5 && static_cast(current_line[i]) >= 0x81 && static_cast(current_line[i]) <= 0xfe) + { + out << current_line[i]; + + i += 1; + + if (i >= current_line.size()) + error("invalid big5 encoding"); + + out << current_line[i]; + } + else if (i >= current_line.size()) + { + error("unexpected end of string"); + } + else if (current_line[i] == '\\') + { + i += 1; + + if (i >= current_line.size()) + error("unexpected end of string in handling '\\'"); + + switch (current_line[i]) + { + case 'a': out << '\a'; break; + case 'b': out << '\b'; break; + case 'v': out << '\v'; break; + case 'n': out << '\n'; break; + case 't': out << '\t'; break; + case 'r': out << '\r'; break; + case '"': out << '"'; break; + case '\\': out << '\\'; break; + default: + std::ostringstream err; + err << "unhandled escape '\\" << current_line[i] << "'"; + warning(err.str()); + + out << current_line[i-1] << current_line[i]; + break; + } + } + else + { + out << current_line[i]; + } + } + + // process trailing garbage in line and warn if there is any + for(i = i+1; i < current_line.size(); ++i) + if (!isspace(current_line[i])) + { + warning("unexpected garbage after string ignoren"); + break; + } +} + +std::string +POParser::get_string(unsigned int skip) +{ + std::ostringstream out; + + if (skip+1 >= static_cast(current_line.size())) + error("unexpected end of line"); + + if (current_line[skip] == ' ' && current_line[skip+1] == '"') + { + get_string_line(out, skip+1); + } + else + { + if (pedantic) + warning("keyword and string must be seperated by a single space"); + + for(;;) + { + if (skip >= static_cast(current_line.size())) + error("unexpected end of line"); + else if (current_line[skip] == '\"') + { + get_string_line(out, skip); + break; + } + else if (!isspace(current_line[skip])) + { + error("string must start with '\"'"); + } + else + { + // skip space + } + + skip += 1; + } + } + +next: + next_line(); + for(std::string::size_type i = 0; i < current_line.size(); ++i) + { + if (current_line[i] == '"') + { + if (i == 1) + if (pedantic) + warning("leading whitespace before string"); + + get_string_line(out, i); + goto next; + } + else if (isspace(current_line[i])) + { + // skip + } + else + { + break; + } + } + + return out.str(); +} + +static bool has_prefix(const std::string& lhs, const std::string rhs) +{ + if (lhs.length() < rhs.length()) + return false; + else + return lhs.compare(0, rhs.length(), rhs) == 0; +} + +void +POParser::parse_header(const std::string& header) +{ + std::string from_charset; + std::string::size_type start = 0; + for(std::string::size_type i = 0; i < header.length(); ++i) + { + if (header[i] == '\n') + { + std::string line = header.substr(start, i - start); + + if (has_prefix(line, "Content-Type:")) + { + // from_charset = line.substr(len); + unsigned int len = strlen("Content-Type: text/plain; charset="); + if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0) + { + from_charset = line.substr(len); + + for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch) + *ch = static_cast(toupper(*ch)); + } + else + { + warning("malformed Content-Type header"); + } + } + else if (has_prefix(line, "Plural-Forms:")) + { + PluralForms plural_forms = PluralForms::from_string(line); + if (!plural_forms) + { + warning("unknown Plural-Forms given"); + } + else + { + if (!dict.get_plural_forms()) + { + dict.set_plural_forms(plural_forms); + } + else + { + if (dict.get_plural_forms() != plural_forms) + { + warning("Plural-Forms missmatch between .po file and dictionary"); + } + } + } + } + start = i+1; + } + } + + if (from_charset.empty() || from_charset == "CHARSET") + { + warning("charset not specified for .po, fallback to utf-8"); + from_charset = "UTF-8"; + } + else if (from_charset == "BIG5") + { + big5 = true; + } + + conv.set_charsets(from_charset, dict.get_charset()); +} + +bool +POParser::is_empty_line() +{ + if (current_line.empty()) + { + return true; + } + else if (current_line[0] == '#') + { // handle comments as empty lines + if (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1]))) + return true; + else + return false; + } + else + { + for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i) + { + if (!isspace(*i)) + return false; + } + } + return true; +} + +bool +POParser::prefix(const char* prefix_str) +{ + return current_line.compare(0, strlen(prefix_str), prefix_str) == 0; +} + +void +POParser::parse() +{ + next_line(); + + // skip UTF-8 intro that some text editors produce + // see http://en.wikipedia.org/wiki/Byte-order_mark + if (current_line.size() >= 3 && + current_line[0] == static_cast(0xef) && + current_line[1] == static_cast(0xbb) && + current_line[2] == static_cast(0xbf)) + { + current_line = current_line.substr(3); + } + + // Parser structure + while(!eof) + { + try + { + bool fuzzy = false; + bool has_msgctxt = false; + std::string msgctxt; + std::string msgid; + + while(prefix("#")) + { + if (current_line.size() >= 2 && current_line[1] == ',') + { + // FIXME: Rather simplistic hunt for fuzzy flag + if (current_line.find("fuzzy", 2) != std::string::npos) + fuzzy = true; + } + + next_line(); + } + + if (!is_empty_line()) + { + if (prefix("msgctxt")) + { + has_msgctxt = true; + msgctxt = get_string(7); + } + + if (prefix("msgid")) + msgid = get_string(5); + else + error("expected 'msgid'"); + + if (prefix("msgid_plural")) + { + std::string msgid_plural = get_string(12); + std::vector msgstr_num; + bool saw_nonempty_msgstr = false; + + next: + if (is_empty_line()) + { + if (msgstr_num.empty()) + error("expected 'msgstr[N] (0 <= N <= 9)'"); + } + else if (prefix("msgstr[") && + current_line.size() > 8 && + isdigit(current_line[7]) && current_line[8] == ']') + { + unsigned int number = static_cast(current_line[7] - '0'); + std::string msgstr = get_string(9); + + if(!msgstr.empty()) + saw_nonempty_msgstr = true; + + if (number >= msgstr_num.size()) + msgstr_num.resize(number+1); + + msgstr_num[number] = conv.convert(msgstr); + goto next; + } + else + { + error("expected 'msgstr[N]'"); + } + + if (!is_empty_line()) + error("expected 'msgstr[N]' or empty line"); + + if (saw_nonempty_msgstr) + { + if (use_fuzzy || !fuzzy) + { + if (!dict.get_plural_forms()) + { + warning("msgstr[N] seen, but no Plural-Forms given"); + } + else + { + if (msgstr_num.size() != dict.get_plural_forms().get_nplural()) + { + warning("msgstr[N] count doesn't match Plural-Forms.nplural"); + } + } + + if (has_msgctxt) + dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num); + else + dict.add_translation(msgid, msgid_plural, msgstr_num); + } + + if (0) + { + std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl; + std::cout << "msgid \"" << msgid << "\"" << std::endl; + std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl; + for(std::vector::size_type i = 0; i < msgstr_num.size(); ++i) + std::cout << "msgstr[" << i << "] \"" << conv.convert(msgstr_num[i]) << "\"" << std::endl; + std::cout << std::endl; + } + } + } + else if (prefix("msgstr")) + { + std::string msgstr = get_string(6); + + if (msgid.empty()) + { + parse_header(msgstr); + } + else if(!msgstr.empty()) + { + if (use_fuzzy || !fuzzy) + { + if (has_msgctxt) + dict.add_translation(msgctxt, msgid, conv.convert(msgstr)); + else + dict.add_translation(msgid, conv.convert(msgstr)); + } + + if (0) + { + std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl; + std::cout << "msgid \"" << msgid << "\"" << std::endl; + std::cout << "msgstr \"" << conv.convert(msgstr) << "\"" << std::endl; + std::cout << std::endl; + } + } + } + else + { + error("expected 'msgstr' or 'msgid_plural'"); + } + } + + if (!is_empty_line()) + error("expected empty line"); + + next_line(); + } + catch(POParserError&) + { + } + } +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/po_parser.hpp b/src/tinygettext/po_parser.hpp new file mode 100644 index 000000000..329af5913 --- /dev/null +++ b/src/tinygettext/po_parser.hpp @@ -0,0 +1,75 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_PO_PARSER_HPP +#define HEADER_TINYGETTEXT_PO_PARSER_HPP + +#include + +#include "iconv.hpp" + +namespace tinygettext { + +class Dictionary; + +class POParser +{ +private: + std::string filename; + std::istream& in; + Dictionary& dict; + bool use_fuzzy; + + bool running; + bool eof; + bool big5; + + int line_number; + std::string current_line; + + IConv conv; + + POParser(const std::string& filename, std::istream& in_, Dictionary& dict_, bool use_fuzzy = true); + ~POParser(); + + void parse_header(const std::string& header); + void parse(); + void next_line(); + std::string get_string(unsigned int skip); + void get_string_line(std::ostringstream& str,unsigned int skip); + bool is_empty_line(); + bool prefix(const char* ); + void error(const std::string& msg) __attribute__((__noreturn__)); + void warning(const std::string& msg); + +public: + /** @param filename name of the istream, only used in error messages + @param in stream from which the PO file is read. + @param dict dictionary to which the strings are written */ + static void parse(const std::string& filename, std::istream& in, Dictionary& dict); + static bool pedantic; + +private: + POParser (const POParser&); + POParser& operator= (const POParser&); +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/stk_file_system.cpp b/src/tinygettext/stk_file_system.cpp new file mode 100644 index 000000000..6aef1615c --- /dev/null +++ b/src/tinygettext/stk_file_system.cpp @@ -0,0 +1,65 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#include "stk_file_system.hpp" + +#include +#include +#include +#include +#include +#include + +namespace tinygettext { + +StkFileSystem::StkFileSystem() +{ +} + +std::vector +StkFileSystem::open_directory(const std::string& pathname) +{ + DIR* dir = opendir(pathname.c_str()); + if (!dir) + { + // FIXME: error handling + return std::vector(); + } + else + { + std::vector files; + + struct dirent* dp; + while((dp = readdir(dir)) != 0) + { + files.push_back(dp->d_name); + } + closedir(dir); + + return files; + } +} + +std::auto_ptr +StkFileSystem::open_file(const std::string& filename) +{ + return std::auto_ptr(new std::ifstream(filename.c_str())); +} + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/stk_file_system.hpp b/src/tinygettext/stk_file_system.hpp new file mode 100644 index 000000000..e120d5b69 --- /dev/null +++ b/src/tinygettext/stk_file_system.hpp @@ -0,0 +1,38 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2009 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_STK_FILE_SYSTEM_HPP +#define HEADER_TINYGETTEXT_STK_FILE_SYSTEM_HPP + +#include "file_system.hpp" + +namespace tinygettext { + +class StkFileSystem : public FileSystem +{ +public: + StkFileSystem(); + + std::vector open_directory(const std::string& pathname); + std::auto_ptr open_file(const std::string& filename); +}; + +} // namespace tinygettext + +#endif + +/* EOF */ diff --git a/src/tinygettext/tinygettext.cpp b/src/tinygettext/tinygettext.cpp new file mode 100644 index 000000000..7f5adc8ae --- /dev/null +++ b/src/tinygettext/tinygettext.cpp @@ -0,0 +1,22 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +namespace tinygettext { + +} // namespace tinygettext + +/* EOF */ diff --git a/src/tinygettext/tinygettext.hpp b/src/tinygettext/tinygettext.hpp new file mode 100644 index 000000000..19fd4c657 --- /dev/null +++ b/src/tinygettext/tinygettext.hpp @@ -0,0 +1,27 @@ +// tinygettext - A gettext replacement that works directly on .po files +// Copyright (C) 2006 Ingo Ruhnke +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +#ifndef HEADER_TINYGETTEXT_TINYGETTEXT_HPP +#define HEADER_TINYGETTEXT_TINYGETTEXT_HPP + +#include "dictionary.hpp" +#include "dictionary_manager.hpp" +#include "language.hpp" + +#endif + +/* EOF */