diff options
-rw-r--r-- | autotests/kconfigtest.cpp | 54 | ||||
-rw-r--r-- | autotests/kconfigtest.h | 2 | ||||
-rw-r--r-- | src/core/kconfigini.cpp | 122 |
3 files changed, 167 insertions, 11 deletions
diff --git a/autotests/kconfigtest.cpp b/autotests/kconfigtest.cpp index 64c6223d..2ad3b312 100644 --- a/autotests/kconfigtest.cpp +++ b/autotests/kconfigtest.cpp @@ -1736,10 +1736,11 @@ void KConfigTest::testQByteArrayUtf8() QFile readFile(file.fileName()); QVERIFY(readFile.open(QFile::ReadOnly)); #define VALUE "Utf8=\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff" + const QByteArray fileBytes = readFile.readAll(); #ifndef Q_OS_WIN - QCOMPARE(readFile.readAll(), QByteArrayLiteral("[General]\n" VALUE "\n")); + QCOMPARE(fileBytes, QByteArrayLiteral("[General]\n" VALUE "\n")); #else - QCOMPARE(readFile.readAll(), QByteArrayLiteral("[General]\r\n" VALUE "\r\n")); + QCOMPARE(fileBytes, QByteArrayLiteral("[General]\r\n" VALUE "\r\n")); #endif #undef VALUE @@ -1749,6 +1750,55 @@ void KConfigTest::testQByteArrayUtf8() QCOMPARE(bytes, general2.readEntry("Utf8", QByteArray())); } +void KConfigTest::testQStringUtf8_data() +{ + QTest::addColumn<QByteArray>("data"); + QTest::newRow("1") << QByteArray("Téléchargements\tTéléchargements"); + QTest::newRow("2") << QByteArray("$¢ह€𐍈\t$¢ह€𐍈"); + QTest::newRow("3") << QByteArray("\xc2\xe0\xa4\xf0\x90\x8d\t\\xc2\\xe0\\xa4\\xf0\\x90\\x8d"); + // 2 byte overlong + QTest::newRow("4") << QByteArray("\xc1\xbf\t\\xc1\\xbf"); + // 3 byte overlong + QTest::newRow("5") << QByteArray("\xe0\x9f\xbf\t\\xe0\\x9f\\xbf"); + // 4 byte overlong + QTest::newRow("6") << QByteArray("\xf0\x8f\xbf\xbf\t\\xf0\\x8f\\xbf\\xbf"); + // outside unicode range + QTest::newRow("7") << QByteArray("\xf4\x90\x80\x80\t\\xf4\\x90\\x80\\x80"); + // just within range + QTest::newRow("8") << QByteArray("\xc2\x80\t\xc2\x80"); + QTest::newRow("9") << QByteArray("\xe0\xa0\x80\t\xe0\xa0\x80"); + QTest::newRow("10") << QByteArray("\xf0\x90\x80\x80\t\xf0\x90\x80\x80"); + QTest::newRow("11") << QByteArray("\xf4\x8f\xbf\xbf\t\xf4\x8f\xbf\xbf"); +} + +void KConfigTest::testQStringUtf8() +{ + QFETCH(QByteArray, data); + const QList<QByteArray> d = data.split('\t'); + const QByteArray value = d[0]; + const QByteArray serialized = d[1]; + QTemporaryFile file; + QVERIFY(file.open()); + KConfig config(file.fileName(), KConfig::SimpleConfig); + KConfigGroup general(&config, "General"); + general.writeEntry("key", value); + config.sync(); + file.flush(); + file.close(); + QFile readFile(file.fileName()); + QVERIFY(readFile.open(QFile::ReadOnly)); + QByteArray fileBytes = readFile.readAll(); +#ifdef Q_OS_WIN + fileBytes.replace("\r\n", "\n"); +#endif + QCOMPARE(fileBytes, QByteArrayLiteral("[General]\nkey=") + serialized + QByteArrayLiteral("\n")); + + // check that reading works + KConfig config2(file.fileName(), KConfig::SimpleConfig); + KConfigGroup general2(&config2, "General"); + QCOMPARE(value, general2.readEntry("key", QByteArray())); +} + void KConfigTest::testNewlines() { // test that kconfig always uses the native line endings diff --git a/autotests/kconfigtest.h b/autotests/kconfigtest.h index 0715f45e..26d8e7f5 100644 --- a/autotests/kconfigtest.h +++ b/autotests/kconfigtest.h @@ -64,6 +64,8 @@ private Q_SLOTS: void testReparent(); void testAnonymousConfig(); void testQByteArrayUtf8(); + void testQStringUtf8_data(); + void testQStringUtf8(); void testSubGroup(); void testAddConfigSources(); diff --git a/src/core/kconfigini.cpp b/src/core/kconfigini.cpp index 84d77b48..87c4a8af 100644 --- a/src/core/kconfigini.cpp +++ b/src/core/kconfigini.cpp @@ -647,13 +647,110 @@ bool KConfigIniBackend::isLocked() const return lockFile && lockFile->isLocked(); } -QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) -{ - static const char nibbleLookup[] = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +namespace { + // serialize an escaped byte at the end of @param data + // @param data should have room for 4 bytes + char* escapeByte(char* data, unsigned char s) { + static const char nibbleLookup[] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + *data++ = '\\'; + *data++ = 'x'; + *data++ = nibbleLookup[s >> 4]; + *data++ = nibbleLookup[s & 0x0f]; + return data; + } + + // Struct that represents a multi-byte UTF-8 character. + // This struct is used to keep track of bytes that seem to be valid + // UTF-8. + struct Utf8Char { + public: + unsigned char bytes[4]; + unsigned char count; + unsigned char charLength; + + Utf8Char() { + clear(); + charLength = 0; + } + void clear() { + count = 0; + } + // Add a byte to the UTF8 character. + // When an additional byte leads to an invalid character, return false. + bool addByte(unsigned char b) { + if (count == 0) { + if (b > 0xc1 && (b & 0xe0) == 0xc0) { + charLength = 2; + } else if ((b & 0xf0) == 0xe0) { + charLength = 3; + } else if (b < 0xf5 && (b & 0xf8) == 0xf0) { + charLength = 4; + } else { + return false; + } + bytes[0] = b; + count = 1; + } else if (count < 4 && (b & 0xc0) == 0x80) { + if (count == 1) { + if (charLength == 3 && bytes[0] == 0xe0 && b < 0xa0) { + return false; // overlong 3 byte sequence + } + if (charLength == 4) { + if (bytes[0] == 0xf0 && b < 0x90) { + return false; // overlong 4 byte sequence + } + if (bytes[0] == 0xf4 && b > 0x8f) { + return false; // Unicode value larger than U+10FFFF + } + } + } + bytes[count++] = b; + } else { + return false; + } + return true; + } + // Return true if Utf8Char contains one valid character. + bool isComplete() { + return count > 0 && count == charLength; + } + // Add the bytes in this UTF8 character in escaped form to data. + char* escapeBytes(char* data) { + for (unsigned char i = 0; i < count; ++i) { + data = escapeByte(data, bytes[i]); + } + clear(); + return data; + } + // Add the bytes of the UTF8 character to a buffer. + // Only call this if isComplete() returns true. + char* writeUtf8(char* data) { + for (unsigned char i = 0; i < count; ++i) { + *data++ = bytes[i]; + } + clear(); + return data; + } + // Write the bytes in the UTF8 character literally, or, if the + // character is not complete, write the escaped bytes. + // This is useful to handle the state that remains after handling + // all bytes in a buffer. + char* write(char* data) { + if (isComplete()) { + data = writeUtf8(data); + } else { + data = escapeBytes(data); + } + return data; + } }; +} +QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) +{ if (aString.isEmpty()) { return aString; } @@ -672,10 +769,16 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin *data++ = 's'; i++; } + Utf8Char utf8; for (; i < l; ++i/*, r++*/) { switch (s[i]) { default: + if (utf8.addByte(s[i])) { + break; + } else { + data = utf8.escapeBytes(data); + } // The \n, \t, \r cases (all < 32) are handled below; we can ignore them here if (((unsigned char)s[i]) < 32) { goto doEscape; @@ -717,13 +820,14 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin break; } doEscape: - *data++ = '\\'; - *data++ = 'x'; - *data++ = nibbleLookup[((unsigned char)s[i]) >> 4]; - *data++ = nibbleLookup[((unsigned char)s[i]) & 0x0f]; + data = escapeByte(data, s[i]); break; } + if (utf8.isComplete()) { + data = utf8.writeUtf8(data); + } } + data = utf8.write(data); *data = 0; result.resize(data - start); |