From 8c34b2d9748311840a93f64494f56a5b1e81015f Mon Sep 17 00:00:00 2001 From: narroo Date: Sat, 15 Feb 2014 19:56:36 -0500 Subject: [PATCH 1/8] Addressed Issue #402. cIniFile can now process UTF-8 files that have a Byte Order Marker, BOM. --- lib/inifile/iniFile.cpp | 154 +++++++++++++++++++++++----------------- lib/inifile/iniFile.h | 2 + 2 files changed, 90 insertions(+), 66 deletions(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index afa1c110d..7b0df3d68 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -83,91 +83,97 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) } } - while (getline(f, line)) + if (getline(f, line)) { - // To be compatible with Win32, check for existence of '\r'. - // Win32 files have the '\r' and Unix files don't at the end of a line. - // Note that the '\r' will be written to INI files from - // Unix so that the created INI file can be read under Win32 - // without change. - size_t lineLength = line.length(); - if (lineLength == 0) - { - continue; - } - if (line[lineLength - 1] == '\r') - { - line = line.substr(0, lineLength - 1); - } + // Removes UTF-8 Byte Order Markers (BOM) if, present. + RemoveBom(line); - if (line.length() == 0) + do { - continue; - } + // To be compatible with Win32, check for existence of '\r'. + // Win32 files have the '\r' and Unix files don't at the end of a line. + // Note that the '\r' will be written to INI files from + // Unix so that the created INI file can be read under Win32 + // without change. + size_t lineLength = line.length(); + if (lineLength == 0) + { + continue; + } + if (line[lineLength - 1] == '\r') + { + line = line.substr(0, lineLength - 1); + } - // Check that the user hasn't opened a binary file by checking the first - // character of each line! - if (!isprint(line[0])) - { - printf("%s: Binary-check failed on char %d\n", __FUNCTION__, line[0]); - f.close(); - return false; - } - if ((pLeft = line.find_first_of(";#[=")) == AString::npos) - { - continue; - } + if (line.length() == 0) + { + continue; + } - switch (line[pLeft]) - { + // Check that the user hasn't opened a binary file by checking the first + // character of each line! + if (!isprint(line[0])) + { + printf("%s: Binary-check failed on char %d\n", __FUNCTION__, line[0]); + f.close(); + return false; + } + if ((pLeft = line.find_first_of(";#[=")) == AString::npos) + { + continue; + } + + switch (line[pLeft]) + { case '[': { - if ( - ((pRight = line.find_last_of("]")) != AString::npos) && - (pRight > pLeft) - ) - { - keyname = line.substr(pLeft + 1, pRight - pLeft - 1); - AddKeyName(keyname); - } - break; + if ( + ((pRight = line.find_last_of("]")) != AString::npos) && + (pRight > pLeft) + ) + { + keyname = line.substr(pLeft + 1, pRight - pLeft - 1); + AddKeyName(keyname); + } + break; } case '=': { - valuename = line.substr(0, pLeft); - value = line.substr(pLeft + 1); - AddValue(keyname, valuename, value); - break; + valuename = line.substr(0, pLeft); + value = line.substr(pLeft + 1); + AddValue(keyname, valuename, value); + break; } case ';': case '#': { - if (names.size() == 0) - { - AddHeaderComment(line.substr(pLeft + 1)); - } - else - { - AddKeyComment(keyname, line.substr(pLeft + 1)); - } - break; + if (names.size() == 0) + { + AddHeaderComment(line.substr(pLeft + 1)); + } + else + { + AddKeyComment(keyname, line.substr(pLeft + 1)); + } + break; } - } // switch (line[pLeft]) - } // while (getline()) + } // switch (line[pLeft]) + } while (getline(f, line)); // do - f.close(); - if (names.size() == 0) - { - return false; + f.close(); + if (names.size() == 0) + { + return false; + } + + if (IsFromExampleRedirect) + { + WriteFile(FILE_IO_PREFIX + a_FileName); + } + return true; } - - if (IsFromExampleRedirect) - { - WriteFile(FILE_IO_PREFIX + a_FileName); - } - return true; } @@ -824,3 +830,19 @@ AString cIniFile::CheckCase(const AString & s) const + +void cIniFile::RemoveBom(AString & a_line) const +{ + // The BOM sequence for UTF-8 is 0xEF,0xBB,0xBF ( In Unicode Latin I:  ) + static char BOM[] = { 0xEF, 0xBB, 0xBF }; + + // The BOM sequence, if present, is always the first three characters of the input. + if (a_line.compare(0, 3, BOM) == 0) + { + a_line.erase(0, 3); + } +} + + + + diff --git a/lib/inifile/iniFile.h b/lib/inifile/iniFile.h index 40af618dc..0bf1d917e 100644 --- a/lib/inifile/iniFile.h +++ b/lib/inifile/iniFile.h @@ -51,6 +51,8 @@ private: /// If the object is case-insensitive, returns s as lowercase; otherwise returns s as-is AString CheckCase(const AString & s) const; + /// Removes the UTF-8 BOMs (Byte order makers), if present. + void RemoveBom(AString & a_line) const; public: enum errors { From d4f2788008d413f2b2031d55759e54b952d0a964 Mon Sep 17 00:00:00 2001 From: narroo Date: Sun, 16 Feb 2014 07:49:09 -0500 Subject: [PATCH 2/8] Changed char[] to unsigned char[] in cIniFile::RemoveBom --- lib/inifile/iniFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index 7b0df3d68..e2a91c4c2 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -834,7 +834,7 @@ AString cIniFile::CheckCase(const AString & s) const void cIniFile::RemoveBom(AString & a_line) const { // The BOM sequence for UTF-8 is 0xEF,0xBB,0xBF ( In Unicode Latin I:  ) - static char BOM[] = { 0xEF, 0xBB, 0xBF }; + static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF }; // The BOM sequence, if present, is always the first three characters of the input. if (a_line.compare(0, 3, BOM) == 0) From 03fd3b556a3842da5e359f5c2317d1b62c8e14fd Mon Sep 17 00:00:00 2001 From: narroo Date: Sun, 16 Feb 2014 08:22:10 -0500 Subject: [PATCH 3/8] Changed unsigned char[] back to char[]. --- lib/inifile/iniFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index e2a91c4c2..db48ad25d 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -834,7 +834,7 @@ AString cIniFile::CheckCase(const AString & s) const void cIniFile::RemoveBom(AString & a_line) const { // The BOM sequence for UTF-8 is 0xEF,0xBB,0xBF ( In Unicode Latin I:  ) - static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF }; + static const AString BOM = "" + 0xEF + 0xBB + 0xBF; // The BOM sequence, if present, is always the first three characters of the input. if (a_line.compare(0, 3, BOM) == 0) From 6eefd54d455d172f7b77819bf9975ed933477c1c Mon Sep 17 00:00:00 2001 From: narroo Date: Sun, 16 Feb 2014 09:25:32 -0500 Subject: [PATCH 4/8] Reworked RemoveBom to use unsigned chars and reverted the logic changes in WriteFile. Should work fine now. --- lib/inifile/iniFile.cpp | 186 +++++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 86 deletions(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index db48ad25d..fbbf5c197 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -83,97 +83,102 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) } } - if (getline(f, line)) + bool IsFirstLine = true; + + while (getline(f, line)) { + // To be compatible with Win32, check for existence of '\r'. + // Win32 files have the '\r' and Unix files don't at the end of a line. + // Note that the '\r' will be written to INI files from + // Unix so that the created INI file can be read under Win32 + // without change. + // Removes UTF-8 Byte Order Markers (BOM) if, present. - RemoveBom(line); - - do + if (IsFirstLine) { - // To be compatible with Win32, check for existence of '\r'. - // Win32 files have the '\r' and Unix files don't at the end of a line. - // Note that the '\r' will be written to INI files from - // Unix so that the created INI file can be read under Win32 - // without change. - size_t lineLength = line.length(); - if (lineLength == 0) - { - continue; - } - if (line[lineLength - 1] == '\r') - { - line = line.substr(0, lineLength - 1); - } + RemoveBom(line); + IsFirstLine = false; + } - if (line.length() == 0) - { - continue; - } - - // Check that the user hasn't opened a binary file by checking the first - // character of each line! - if (!isprint(line[0])) - { - printf("%s: Binary-check failed on char %d\n", __FUNCTION__, line[0]); - f.close(); - return false; - } - if ((pLeft = line.find_first_of(";#[=")) == AString::npos) - { - continue; - } - - switch (line[pLeft]) - { - case '[': - { - if ( - ((pRight = line.find_last_of("]")) != AString::npos) && - (pRight > pLeft) - ) - { - keyname = line.substr(pLeft + 1, pRight - pLeft - 1); - AddKeyName(keyname); - } - break; - } - - case '=': - { - valuename = line.substr(0, pLeft); - value = line.substr(pLeft + 1); - AddValue(keyname, valuename, value); - break; - } - - case ';': - case '#': - { - if (names.size() == 0) - { - AddHeaderComment(line.substr(pLeft + 1)); - } - else - { - AddKeyComment(keyname, line.substr(pLeft + 1)); - } - break; - } - } // switch (line[pLeft]) - } while (getline(f, line)); // do - - f.close(); - if (names.size() == 0) + size_t lineLength = line.length(); + if (lineLength == 0) { + continue; + } + if (line[lineLength - 1] == '\r') + { + line = line.substr(0, lineLength - 1); + } + + if (line.length() == 0) + { + continue; + } + + // Check that the user hasn't opened a binary file by checking the first + // character of each line! + if (!isprint(line[0])) + { + printf("%s: Binary-check failed on char %d\n", __FUNCTION__, line[0]); + f.close(); return false; } - - if (IsFromExampleRedirect) + if ((pLeft = line.find_first_of(";#[=")) == AString::npos) { - WriteFile(FILE_IO_PREFIX + a_FileName); + continue; } - return true; + + switch (line[pLeft]) + { + case '[': + { + if ( + ((pRight = line.find_last_of("]")) != AString::npos) && + (pRight > pLeft) + ) + { + keyname = line.substr(pLeft + 1, pRight - pLeft - 1); + AddKeyName(keyname); + } + break; + } + + case '=': + { + valuename = line.substr(0, pLeft); + value = line.substr(pLeft + 1); + AddValue(keyname, valuename, value); + break; + } + + case ';': + case '#': + { + if (names.size() == 0) + { + AddHeaderComment(line.substr(pLeft + 1)); + } + else + { + AddKeyComment(keyname, line.substr(pLeft + 1)); + } + break; + } + } // switch (line[pLeft]) + } // while(getline(f, line)) + + f.close(); + if (names.size() == 0) + { + return false; } + + if (IsFromExampleRedirect) + { + WriteFile(FILE_IO_PREFIX + a_FileName); + } + + return true; } @@ -833,14 +838,23 @@ AString cIniFile::CheckCase(const AString & s) const void cIniFile::RemoveBom(AString & a_line) const { - // The BOM sequence for UTF-8 is 0xEF,0xBB,0xBF ( In Unicode Latin I:  ) - static const AString BOM = "" + 0xEF + 0xBB + 0xBF; + // The BOM sequence for UTF-8 is 0xEF,0xBB,0xBF + static unsigned const char BOM[] = { 0xEF, 0xBB, 0xBF }; - // The BOM sequence, if present, is always the first three characters of the input. - if (a_line.compare(0, 3, BOM) == 0) + // The BOM sequence, if present, is always th e first three characters of the input. + const AString ref = a_line.substr(0, 3); + + // If any of the first three chars do not match, return and do nothing. + for (int i = 0; i < 3; ++i) { - a_line.erase(0, 3); + if (static_cast(ref[i]) != BOM[i]) + { + return; + } } + + // First three characters match; erase them. + a_line.erase(0, 3); } From 3ce8bf9712f09bf670e3cb11138c7be9bb26b211 Mon Sep 17 00:00:00 2001 From: narroo Date: Mon, 17 Feb 2014 08:45:31 -0500 Subject: [PATCH 5/8] Fixed Tab spacing of cases. --- lib/inifile/iniFile.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index fbbf5c197..212c1d14d 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -130,8 +130,8 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) switch (line[pLeft]) { - case '[': - { + case '[': + { if ( ((pRight = line.find_last_of("]")) != AString::npos) && (pRight > pLeft) @@ -141,19 +141,19 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) AddKeyName(keyname); } break; - } + } - case '=': - { + case '=': + { valuename = line.substr(0, pLeft); value = line.substr(pLeft + 1); AddValue(keyname, valuename, value); break; - } + } - case ';': - case '#': - { + case ';': + case '#': + { if (names.size() == 0) { AddHeaderComment(line.substr(pLeft + 1)); @@ -163,7 +163,7 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) AddKeyComment(keyname, line.substr(pLeft + 1)); } break; - } + } } // switch (line[pLeft]) } // while(getline(f, line)) From ecabb2b34f8dea427868f76019a1dd7f2b031bf9 Mon Sep 17 00:00:00 2001 From: narroo Date: Mon, 17 Feb 2014 08:46:41 -0500 Subject: [PATCH 6/8] Fixed the tab spacing. --- lib/inifile/iniFile.cpp | 44 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index 212c1d14d..9c5300cdf 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -132,37 +132,37 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) { case '[': { - if ( - ((pRight = line.find_last_of("]")) != AString::npos) && - (pRight > pLeft) - ) - { - keyname = line.substr(pLeft + 1, pRight - pLeft - 1); - AddKeyName(keyname); - } - break; + if ( + ((pRight = line.find_last_of("]")) != AString::npos) && + (pRight > pLeft) + ) + { + keyname = line.substr(pLeft + 1, pRight - pLeft - 1); + AddKeyName(keyname); + } + break; } case '=': { - valuename = line.substr(0, pLeft); - value = line.substr(pLeft + 1); - AddValue(keyname, valuename, value); - break; + valuename = line.substr(0, pLeft); + value = line.substr(pLeft + 1); + AddValue(keyname, valuename, value); + break; } case ';': case '#': { - if (names.size() == 0) - { - AddHeaderComment(line.substr(pLeft + 1)); - } - else - { - AddKeyComment(keyname, line.substr(pLeft + 1)); - } - break; + if (names.size() == 0) + { + AddHeaderComment(line.substr(pLeft + 1)); + } + else + { + AddKeyComment(keyname, line.substr(pLeft + 1)); + } + break; } } // switch (line[pLeft]) } // while(getline(f, line)) From 952a338c7f5518bacd5b733ecbcc62c02cfc60b2 Mon Sep 17 00:00:00 2001 From: narroo Date: Mon, 17 Feb 2014 08:50:22 -0500 Subject: [PATCH 7/8] Fixed Comment Typo. --- lib/inifile/iniFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index 9c5300cdf..c56292059 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -165,7 +165,7 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) break; } } // switch (line[pLeft]) - } // while(getline(f, line)) + } // while(getline()) f.close(); if (names.size() == 0) From 794be05f229009ae9e00150b50ccff908b71b1fd Mon Sep 17 00:00:00 2001 From: narroo Date: Mon, 17 Feb 2014 08:51:36 -0500 Subject: [PATCH 8/8] Fixed comment typo --- lib/inifile/iniFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/inifile/iniFile.cpp b/lib/inifile/iniFile.cpp index c56292059..cf8b63987 100644 --- a/lib/inifile/iniFile.cpp +++ b/lib/inifile/iniFile.cpp @@ -165,7 +165,7 @@ bool cIniFile::ReadFile(const AString & a_FileName, bool a_AllowExampleRedirect) break; } } // switch (line[pLeft]) - } // while(getline()) + } // while (getline()) f.close(); if (names.size() == 0)