diff options
| -rw-r--r-- | autotests/kconfigtest.cpp | 54 | ||||
| -rw-r--r-- | autotests/kconfigtest.h | 2 | ||||
| -rw-r--r-- | src/core/kconfigini.cpp | 122 | 
3 files changed, 167 insertions, 11 deletions
| diff --git a/autotests/kconfigtest.cpp b/autotests/kconfigtest.cpp index 64c6223d..2ad3b312 100644 --- a/autotests/kconfigtest.cpp +++ b/autotests/kconfigtest.cpp @@ -1736,10 +1736,11 @@ void KConfigTest::testQByteArrayUtf8()      QFile readFile(file.fileName());      QVERIFY(readFile.open(QFile::ReadOnly));  #define VALUE "Utf8=\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff" +    const QByteArray fileBytes = readFile.readAll();  #ifndef Q_OS_WIN -    QCOMPARE(readFile.readAll(), QByteArrayLiteral("[General]\n" VALUE "\n")); +    QCOMPARE(fileBytes, QByteArrayLiteral("[General]\n" VALUE "\n"));  #else -    QCOMPARE(readFile.readAll(), QByteArrayLiteral("[General]\r\n" VALUE "\r\n")); +    QCOMPARE(fileBytes, QByteArrayLiteral("[General]\r\n" VALUE "\r\n"));  #endif  #undef VALUE @@ -1749,6 +1750,55 @@ void KConfigTest::testQByteArrayUtf8()      QCOMPARE(bytes, general2.readEntry("Utf8", QByteArray()));  } +void KConfigTest::testQStringUtf8_data() +{ +    QTest::addColumn<QByteArray>("data"); +    QTest::newRow("1") << QByteArray("Téléchargements\tTéléchargements"); +    QTest::newRow("2") << QByteArray("$¢ह€𐍈\t$¢ह€𐍈"); +    QTest::newRow("3") << QByteArray("\xc2\xe0\xa4\xf0\x90\x8d\t\\xc2\\xe0\\xa4\\xf0\\x90\\x8d"); +    // 2 byte overlong +    QTest::newRow("4") << QByteArray("\xc1\xbf\t\\xc1\\xbf"); +    // 3 byte overlong +    QTest::newRow("5") << QByteArray("\xe0\x9f\xbf\t\\xe0\\x9f\\xbf"); +    // 4 byte overlong +    QTest::newRow("6") << QByteArray("\xf0\x8f\xbf\xbf\t\\xf0\\x8f\\xbf\\xbf"); +    // outside unicode range +    QTest::newRow("7") << QByteArray("\xf4\x90\x80\x80\t\\xf4\\x90\\x80\\x80"); +    // just within range +    QTest::newRow("8") << QByteArray("\xc2\x80\t\xc2\x80"); +    QTest::newRow("9") << QByteArray("\xe0\xa0\x80\t\xe0\xa0\x80"); +    QTest::newRow("10") << QByteArray("\xf0\x90\x80\x80\t\xf0\x90\x80\x80"); +    QTest::newRow("11") << QByteArray("\xf4\x8f\xbf\xbf\t\xf4\x8f\xbf\xbf"); +} + +void KConfigTest::testQStringUtf8() +{ +    QFETCH(QByteArray, data); +    const QList<QByteArray> d = data.split('\t'); +    const QByteArray value = d[0]; +    const QByteArray serialized = d[1]; +    QTemporaryFile file; +    QVERIFY(file.open()); +    KConfig config(file.fileName(), KConfig::SimpleConfig); +    KConfigGroup general(&config, "General"); +    general.writeEntry("key", value); +    config.sync(); +    file.flush(); +    file.close(); +    QFile readFile(file.fileName()); +    QVERIFY(readFile.open(QFile::ReadOnly)); +    QByteArray fileBytes = readFile.readAll(); +#ifdef Q_OS_WIN +    fileBytes.replace("\r\n", "\n"); +#endif +    QCOMPARE(fileBytes, QByteArrayLiteral("[General]\nkey=") + serialized + QByteArrayLiteral("\n")); + +    // check that reading works +    KConfig config2(file.fileName(), KConfig::SimpleConfig); +    KConfigGroup general2(&config2, "General"); +    QCOMPARE(value, general2.readEntry("key", QByteArray())); +} +  void KConfigTest::testNewlines()  {      // test that kconfig always uses the native line endings diff --git a/autotests/kconfigtest.h b/autotests/kconfigtest.h index 0715f45e..26d8e7f5 100644 --- a/autotests/kconfigtest.h +++ b/autotests/kconfigtest.h @@ -64,6 +64,8 @@ private Q_SLOTS:      void testReparent();      void testAnonymousConfig();      void testQByteArrayUtf8(); +    void testQStringUtf8_data(); +    void testQStringUtf8();      void testSubGroup();      void testAddConfigSources(); diff --git a/src/core/kconfigini.cpp b/src/core/kconfigini.cpp index 84d77b48..87c4a8af 100644 --- a/src/core/kconfigini.cpp +++ b/src/core/kconfigini.cpp @@ -647,13 +647,110 @@ bool KConfigIniBackend::isLocked() const      return lockFile && lockFile->isLocked();  } -QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) -{ -    static const char nibbleLookup[] = { -        '0', '1', '2', '3', '4', '5', '6', '7', -        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +namespace { +    // serialize an escaped byte at the end of @param data +    // @param data should have room for 4 bytes +    char* escapeByte(char* data, unsigned char s) { +        static const char nibbleLookup[] = { +            '0', '1', '2', '3', '4', '5', '6', '7', +            '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +        }; +        *data++ = '\\'; +        *data++ = 'x'; +        *data++ = nibbleLookup[s >> 4]; +        *data++ = nibbleLookup[s & 0x0f]; +        return data; +    } + +    // Struct that represents a multi-byte UTF-8 character. +    // This struct is used to keep track of bytes that seem to be valid +    // UTF-8. +    struct Utf8Char { +    public: +        unsigned char bytes[4]; +        unsigned char count; +        unsigned char charLength; + +        Utf8Char() { +            clear(); +            charLength = 0; +        } +        void clear() { +            count = 0; +        } +        // Add a byte to the UTF8 character. +        // When an additional byte leads to an invalid character, return false. +        bool addByte(unsigned char b) { +            if (count == 0) { +                if (b > 0xc1 && (b & 0xe0) == 0xc0) { +                    charLength = 2; +                } else if ((b & 0xf0) == 0xe0) { +                    charLength = 3; +                } else if (b < 0xf5 && (b & 0xf8) == 0xf0) { +                    charLength = 4; +                } else { +                    return false; +                } +                bytes[0] = b; +                count = 1; +            } else if (count < 4 && (b & 0xc0) == 0x80) { +                if (count == 1) { +                    if (charLength == 3 && bytes[0] == 0xe0 && b < 0xa0) { +                        return false; // overlong 3 byte sequence +                    } +                    if (charLength == 4) { +                        if (bytes[0] == 0xf0 && b < 0x90) { +                            return false; // overlong 4 byte sequence +                        } +                        if (bytes[0] == 0xf4 && b > 0x8f) { +                            return false; // Unicode value larger than U+10FFFF +                        } +                    } +                } +                bytes[count++] = b; +            } else { +                return false; +            } +            return true; +        } +        // Return true if Utf8Char contains one valid character. +        bool isComplete() { +            return count > 0 && count == charLength; +        } +        // Add the bytes in this UTF8 character in escaped form to data. +        char* escapeBytes(char* data) { +            for (unsigned char i = 0; i < count; ++i) { +                data = escapeByte(data, bytes[i]); +            } +            clear(); +            return data; +        } +        // Add the bytes of the UTF8 character to a buffer. +        // Only call this if isComplete() returns true. +        char* writeUtf8(char* data) { +            for (unsigned char i = 0; i < count; ++i) { +                *data++ = bytes[i]; +            } +            clear(); +            return data; +        } +        // Write the bytes in the UTF8 character literally, or, if the +        // character is not complete, write the escaped bytes. +        // This is useful to handle the state that remains after handling +        // all bytes in a buffer. +        char* write(char* data) { +            if (isComplete()) { +                data = writeUtf8(data); +            } else { +                data = escapeBytes(data); +            } +            return data; +        }      }; +} +QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) +{      if (aString.isEmpty()) {          return aString;      } @@ -672,10 +769,16 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin          *data++ = 's';          i++;      } +    Utf8Char utf8;      for (; i < l; ++i/*, r++*/) {          switch (s[i]) {          default: +            if (utf8.addByte(s[i])) { +                break; +            } else { +                data = utf8.escapeBytes(data); +            }              // The \n, \t, \r cases (all < 32) are handled below; we can ignore them here              if (((unsigned char)s[i]) < 32) {                  goto doEscape; @@ -717,13 +820,14 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin                  break;              }          doEscape: -            *data++ = '\\'; -            *data++ = 'x'; -            *data++ = nibbleLookup[((unsigned char)s[i]) >> 4]; -            *data++ = nibbleLookup[((unsigned char)s[i]) & 0x0f]; +            data = escapeByte(data, s[i]);              break;          } +        if (utf8.isComplete()) { +            data = utf8.writeUtf8(data); +        }      } +    data = utf8.write(data);      *data = 0;      result.resize(data - start); | 
