aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJos van den Oever <jos@vandenoever.info>2019-02-17 23:32:35 +0100
committerJos van den Oever <jos@vandenoever.info>2019-02-20 17:19:14 +0100
commit2cdcd4f30666fd1095ab7cf31361e404db871075 (patch)
tree4875c4c3a78583342821814b53444443278448e4
parentfbad48f87c1e62b1d56215eba76a7bb7e2d22f1e (diff)
downloadkconfig-2cdcd4f30666fd1095ab7cf31361e404db871075.tar.gz
kconfig-2cdcd4f30666fd1095ab7cf31361e404db871075.tar.bz2
Write valid UTF8 characters without escaping.
Summary: commit 6a18528 introduced escaping of bytes >= 127 to ensure that KConfig files are valid UTF8. The simplistic approach with a cutoff results in many escaped bytes where it is not required. Especially non-western configuration files would have many escapes. This commit fixes that by only escaping bytes that are not valid UTF8. BUG: 403557 FIXED-IN: 5.56 Test Plan: ninja && ninja test Reviewers: dfaure, arichardson, apol, #frameworks, thiago Subscribers: rapiteanu, kde-frameworks-devel Tags: #frameworks Differential Revision: https://phabricator.kde.org/D19107
-rw-r--r--autotests/kconfigtest.cpp54
-rw-r--r--autotests/kconfigtest.h2
-rw-r--r--src/core/kconfigini.cpp122
3 files changed, 167 insertions, 11 deletions
diff --git a/autotests/kconfigtest.cpp b/autotests/kconfigtest.cpp
index 64c6223d..2ad3b312 100644
--- a/autotests/kconfigtest.cpp
+++ b/autotests/kconfigtest.cpp
@@ -1736,10 +1736,11 @@ void KConfigTest::testQByteArrayUtf8()
QFile readFile(file.fileName());
QVERIFY(readFile.open(QFile::ReadOnly));
#define VALUE "Utf8=\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd\\xfe\\xff"
+ const QByteArray fileBytes = readFile.readAll();
#ifndef Q_OS_WIN
- QCOMPARE(readFile.readAll(), QByteArrayLiteral("[General]\n" VALUE "\n"));
+ QCOMPARE(fileBytes, QByteArrayLiteral("[General]\n" VALUE "\n"));
#else
- QCOMPARE(readFile.readAll(), QByteArrayLiteral("[General]\r\n" VALUE "\r\n"));
+ QCOMPARE(fileBytes, QByteArrayLiteral("[General]\r\n" VALUE "\r\n"));
#endif
#undef VALUE
@@ -1749,6 +1750,55 @@ void KConfigTest::testQByteArrayUtf8()
QCOMPARE(bytes, general2.readEntry("Utf8", QByteArray()));
}
+void KConfigTest::testQStringUtf8_data()
+{
+ QTest::addColumn<QByteArray>("data");
+ QTest::newRow("1") << QByteArray("Téléchargements\tTéléchargements");
+ QTest::newRow("2") << QByteArray("$¢ह€𐍈\t$¢ह€𐍈");
+ QTest::newRow("3") << QByteArray("\xc2\xe0\xa4\xf0\x90\x8d\t\\xc2\\xe0\\xa4\\xf0\\x90\\x8d");
+ // 2 byte overlong
+ QTest::newRow("4") << QByteArray("\xc1\xbf\t\\xc1\\xbf");
+ // 3 byte overlong
+ QTest::newRow("5") << QByteArray("\xe0\x9f\xbf\t\\xe0\\x9f\\xbf");
+ // 4 byte overlong
+ QTest::newRow("6") << QByteArray("\xf0\x8f\xbf\xbf\t\\xf0\\x8f\\xbf\\xbf");
+ // outside unicode range
+ QTest::newRow("7") << QByteArray("\xf4\x90\x80\x80\t\\xf4\\x90\\x80\\x80");
+ // just within range
+ QTest::newRow("8") << QByteArray("\xc2\x80\t\xc2\x80");
+ QTest::newRow("9") << QByteArray("\xe0\xa0\x80\t\xe0\xa0\x80");
+ QTest::newRow("10") << QByteArray("\xf0\x90\x80\x80\t\xf0\x90\x80\x80");
+ QTest::newRow("11") << QByteArray("\xf4\x8f\xbf\xbf\t\xf4\x8f\xbf\xbf");
+}
+
+void KConfigTest::testQStringUtf8()
+{
+ QFETCH(QByteArray, data);
+ const QList<QByteArray> d = data.split('\t');
+ const QByteArray value = d[0];
+ const QByteArray serialized = d[1];
+ QTemporaryFile file;
+ QVERIFY(file.open());
+ KConfig config(file.fileName(), KConfig::SimpleConfig);
+ KConfigGroup general(&config, "General");
+ general.writeEntry("key", value);
+ config.sync();
+ file.flush();
+ file.close();
+ QFile readFile(file.fileName());
+ QVERIFY(readFile.open(QFile::ReadOnly));
+ QByteArray fileBytes = readFile.readAll();
+#ifdef Q_OS_WIN
+ fileBytes.replace("\r\n", "\n");
+#endif
+ QCOMPARE(fileBytes, QByteArrayLiteral("[General]\nkey=") + serialized + QByteArrayLiteral("\n"));
+
+ // check that reading works
+ KConfig config2(file.fileName(), KConfig::SimpleConfig);
+ KConfigGroup general2(&config2, "General");
+ QCOMPARE(value, general2.readEntry("key", QByteArray()));
+}
+
void KConfigTest::testNewlines()
{
// test that kconfig always uses the native line endings
diff --git a/autotests/kconfigtest.h b/autotests/kconfigtest.h
index 0715f45e..26d8e7f5 100644
--- a/autotests/kconfigtest.h
+++ b/autotests/kconfigtest.h
@@ -64,6 +64,8 @@ private Q_SLOTS:
void testReparent();
void testAnonymousConfig();
void testQByteArrayUtf8();
+ void testQStringUtf8_data();
+ void testQStringUtf8();
void testSubGroup();
void testAddConfigSources();
diff --git a/src/core/kconfigini.cpp b/src/core/kconfigini.cpp
index 84d77b48..87c4a8af 100644
--- a/src/core/kconfigini.cpp
+++ b/src/core/kconfigini.cpp
@@ -647,13 +647,110 @@ bool KConfigIniBackend::isLocked() const
return lockFile && lockFile->isLocked();
}
-QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type)
-{
- static const char nibbleLookup[] = {
- '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+namespace {
+ // serialize an escaped byte at the end of @param data
+ // @param data should have room for 4 bytes
+ char* escapeByte(char* data, unsigned char s) {
+ static const char nibbleLookup[] = {
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+ };
+ *data++ = '\\';
+ *data++ = 'x';
+ *data++ = nibbleLookup[s >> 4];
+ *data++ = nibbleLookup[s & 0x0f];
+ return data;
+ }
+
+ // Struct that represents a multi-byte UTF-8 character.
+ // This struct is used to keep track of bytes that seem to be valid
+ // UTF-8.
+ struct Utf8Char {
+ public:
+ unsigned char bytes[4];
+ unsigned char count;
+ unsigned char charLength;
+
+ Utf8Char() {
+ clear();
+ charLength = 0;
+ }
+ void clear() {
+ count = 0;
+ }
+ // Add a byte to the UTF8 character.
+ // When an additional byte leads to an invalid character, return false.
+ bool addByte(unsigned char b) {
+ if (count == 0) {
+ if (b > 0xc1 && (b & 0xe0) == 0xc0) {
+ charLength = 2;
+ } else if ((b & 0xf0) == 0xe0) {
+ charLength = 3;
+ } else if (b < 0xf5 && (b & 0xf8) == 0xf0) {
+ charLength = 4;
+ } else {
+ return false;
+ }
+ bytes[0] = b;
+ count = 1;
+ } else if (count < 4 && (b & 0xc0) == 0x80) {
+ if (count == 1) {
+ if (charLength == 3 && bytes[0] == 0xe0 && b < 0xa0) {
+ return false; // overlong 3 byte sequence
+ }
+ if (charLength == 4) {
+ if (bytes[0] == 0xf0 && b < 0x90) {
+ return false; // overlong 4 byte sequence
+ }
+ if (bytes[0] == 0xf4 && b > 0x8f) {
+ return false; // Unicode value larger than U+10FFFF
+ }
+ }
+ }
+ bytes[count++] = b;
+ } else {
+ return false;
+ }
+ return true;
+ }
+ // Return true if Utf8Char contains one valid character.
+ bool isComplete() {
+ return count > 0 && count == charLength;
+ }
+ // Add the bytes in this UTF8 character in escaped form to data.
+ char* escapeBytes(char* data) {
+ for (unsigned char i = 0; i < count; ++i) {
+ data = escapeByte(data, bytes[i]);
+ }
+ clear();
+ return data;
+ }
+ // Add the bytes of the UTF8 character to a buffer.
+ // Only call this if isComplete() returns true.
+ char* writeUtf8(char* data) {
+ for (unsigned char i = 0; i < count; ++i) {
+ *data++ = bytes[i];
+ }
+ clear();
+ return data;
+ }
+ // Write the bytes in the UTF8 character literally, or, if the
+ // character is not complete, write the escaped bytes.
+ // This is useful to handle the state that remains after handling
+ // all bytes in a buffer.
+ char* write(char* data) {
+ if (isComplete()) {
+ data = writeUtf8(data);
+ } else {
+ data = escapeBytes(data);
+ }
+ return data;
+ }
};
+}
+QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type)
+{
if (aString.isEmpty()) {
return aString;
}
@@ -672,10 +769,16 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin
*data++ = 's';
i++;
}
+ Utf8Char utf8;
for (; i < l; ++i/*, r++*/) {
switch (s[i]) {
default:
+ if (utf8.addByte(s[i])) {
+ break;
+ } else {
+ data = utf8.escapeBytes(data);
+ }
// The \n, \t, \r cases (all < 32) are handled below; we can ignore them here
if (((unsigned char)s[i]) < 32) {
goto doEscape;
@@ -717,13 +820,14 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin
break;
}
doEscape:
- *data++ = '\\';
- *data++ = 'x';
- *data++ = nibbleLookup[((unsigned char)s[i]) >> 4];
- *data++ = nibbleLookup[((unsigned char)s[i]) & 0x0f];
+ data = escapeByte(data, s[i]);
break;
}
+ if (utf8.isComplete()) {
+ data = utf8.writeUtf8(data);
+ }
}
+ data = utf8.write(data);
*data = 0;
result.resize(data - start);