diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/kconfigini.cpp | 122 |
1 files changed, 113 insertions, 9 deletions
diff --git a/src/core/kconfigini.cpp b/src/core/kconfigini.cpp index 84d77b48..87c4a8af 100644 --- a/src/core/kconfigini.cpp +++ b/src/core/kconfigini.cpp @@ -647,13 +647,110 @@ bool KConfigIniBackend::isLocked() const return lockFile && lockFile->isLocked(); } -QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) -{ - static const char nibbleLookup[] = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +namespace { + // serialize an escaped byte at the end of @param data + // @param data should have room for 4 bytes + char* escapeByte(char* data, unsigned char s) { + static const char nibbleLookup[] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + *data++ = '\\'; + *data++ = 'x'; + *data++ = nibbleLookup[s >> 4]; + *data++ = nibbleLookup[s & 0x0f]; + return data; + } + + // Struct that represents a multi-byte UTF-8 character. + // This struct is used to keep track of bytes that seem to be valid + // UTF-8. + struct Utf8Char { + public: + unsigned char bytes[4]; + unsigned char count; + unsigned char charLength; + + Utf8Char() { + clear(); + charLength = 0; + } + void clear() { + count = 0; + } + // Add a byte to the UTF8 character. + // When an additional byte leads to an invalid character, return false. + bool addByte(unsigned char b) { + if (count == 0) { + if (b > 0xc1 && (b & 0xe0) == 0xc0) { + charLength = 2; + } else if ((b & 0xf0) == 0xe0) { + charLength = 3; + } else if (b < 0xf5 && (b & 0xf8) == 0xf0) { + charLength = 4; + } else { + return false; + } + bytes[0] = b; + count = 1; + } else if (count < 4 && (b & 0xc0) == 0x80) { + if (count == 1) { + if (charLength == 3 && bytes[0] == 0xe0 && b < 0xa0) { + return false; // overlong 3 byte sequence + } + if (charLength == 4) { + if (bytes[0] == 0xf0 && b < 0x90) { + return false; // overlong 4 byte sequence + } + if (bytes[0] == 0xf4 && b > 0x8f) { + return false; // Unicode value larger than U+10FFFF + } + } + } + bytes[count++] = b; + } else { + return false; + } + return true; + } + // Return true if Utf8Char contains one valid character. + bool isComplete() { + return count > 0 && count == charLength; + } + // Add the bytes in this UTF8 character in escaped form to data. + char* escapeBytes(char* data) { + for (unsigned char i = 0; i < count; ++i) { + data = escapeByte(data, bytes[i]); + } + clear(); + return data; + } + // Add the bytes of the UTF8 character to a buffer. + // Only call this if isComplete() returns true. + char* writeUtf8(char* data) { + for (unsigned char i = 0; i < count; ++i) { + *data++ = bytes[i]; + } + clear(); + return data; + } + // Write the bytes in the UTF8 character literally, or, if the + // character is not complete, write the escaped bytes. + // This is useful to handle the state that remains after handling + // all bytes in a buffer. + char* write(char* data) { + if (isComplete()) { + data = writeUtf8(data); + } else { + data = escapeBytes(data); + } + return data; + } }; +} +QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) +{ if (aString.isEmpty()) { return aString; } @@ -672,10 +769,16 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin *data++ = 's'; i++; } + Utf8Char utf8; for (; i < l; ++i/*, r++*/) { switch (s[i]) { default: + if (utf8.addByte(s[i])) { + break; + } else { + data = utf8.escapeBytes(data); + } // The \n, \t, \r cases (all < 32) are handled below; we can ignore them here if (((unsigned char)s[i]) < 32) { goto doEscape; @@ -717,13 +820,14 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin break; } doEscape: - *data++ = '\\'; - *data++ = 'x'; - *data++ = nibbleLookup[((unsigned char)s[i]) >> 4]; - *data++ = nibbleLookup[((unsigned char)s[i]) & 0x0f]; + data = escapeByte(data, s[i]); break; } + if (utf8.isComplete()) { + data = utf8.writeUtf8(data); + } } + data = utf8.write(data); *data = 0; result.resize(data - start); |