diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/kconfigini.cpp | 122 | 
1 files changed, 113 insertions, 9 deletions
| diff --git a/src/core/kconfigini.cpp b/src/core/kconfigini.cpp index 84d77b48..87c4a8af 100644 --- a/src/core/kconfigini.cpp +++ b/src/core/kconfigini.cpp @@ -647,13 +647,110 @@ bool KConfigIniBackend::isLocked() const      return lockFile && lockFile->isLocked();  } -QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) -{ -    static const char nibbleLookup[] = { -        '0', '1', '2', '3', '4', '5', '6', '7', -        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +namespace { +    // serialize an escaped byte at the end of @param data +    // @param data should have room for 4 bytes +    char* escapeByte(char* data, unsigned char s) { +        static const char nibbleLookup[] = { +            '0', '1', '2', '3', '4', '5', '6', '7', +            '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' +        }; +        *data++ = '\\'; +        *data++ = 'x'; +        *data++ = nibbleLookup[s >> 4]; +        *data++ = nibbleLookup[s & 0x0f]; +        return data; +    } + +    // Struct that represents a multi-byte UTF-8 character. +    // This struct is used to keep track of bytes that seem to be valid +    // UTF-8. +    struct Utf8Char { +    public: +        unsigned char bytes[4]; +        unsigned char count; +        unsigned char charLength; + +        Utf8Char() { +            clear(); +            charLength = 0; +        } +        void clear() { +            count = 0; +        } +        // Add a byte to the UTF8 character. +        // When an additional byte leads to an invalid character, return false. +        bool addByte(unsigned char b) { +            if (count == 0) { +                if (b > 0xc1 && (b & 0xe0) == 0xc0) { +                    charLength = 2; +                } else if ((b & 0xf0) == 0xe0) { +                    charLength = 3; +                } else if (b < 0xf5 && (b & 0xf8) == 0xf0) { +                    charLength = 4; +                } else { +                    return false; +                } +                bytes[0] = b; +                count = 1; +            } else if (count < 4 && (b & 0xc0) == 0x80) { +                if (count == 1) { +                    if (charLength == 3 && bytes[0] == 0xe0 && b < 0xa0) { +                        return false; // overlong 3 byte sequence +                    } +                    if (charLength == 4) { +                        if (bytes[0] == 0xf0 && b < 0x90) { +                            return false; // overlong 4 byte sequence +                        } +                        if (bytes[0] == 0xf4 && b > 0x8f) { +                            return false; // Unicode value larger than U+10FFFF +                        } +                    } +                } +                bytes[count++] = b; +            } else { +                return false; +            } +            return true; +        } +        // Return true if Utf8Char contains one valid character. +        bool isComplete() { +            return count > 0 && count == charLength; +        } +        // Add the bytes in this UTF8 character in escaped form to data. +        char* escapeBytes(char* data) { +            for (unsigned char i = 0; i < count; ++i) { +                data = escapeByte(data, bytes[i]); +            } +            clear(); +            return data; +        } +        // Add the bytes of the UTF8 character to a buffer. +        // Only call this if isComplete() returns true. +        char* writeUtf8(char* data) { +            for (unsigned char i = 0; i < count; ++i) { +                *data++ = bytes[i]; +            } +            clear(); +            return data; +        } +        // Write the bytes in the UTF8 character literally, or, if the +        // character is not complete, write the escaped bytes. +        // This is useful to handle the state that remains after handling +        // all bytes in a buffer. +        char* write(char* data) { +            if (isComplete()) { +                data = writeUtf8(data); +            } else { +                data = escapeBytes(data); +            } +            return data; +        }      }; +} +QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, StringType type) +{      if (aString.isEmpty()) {          return aString;      } @@ -672,10 +769,16 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin          *data++ = 's';          i++;      } +    Utf8Char utf8;      for (; i < l; ++i/*, r++*/) {          switch (s[i]) {          default: +            if (utf8.addByte(s[i])) { +                break; +            } else { +                data = utf8.escapeBytes(data); +            }              // The \n, \t, \r cases (all < 32) are handled below; we can ignore them here              if (((unsigned char)s[i]) < 32) {                  goto doEscape; @@ -717,13 +820,14 @@ QByteArray KConfigIniBackend::stringToPrintable(const QByteArray &aString, Strin                  break;              }          doEscape: -            *data++ = '\\'; -            *data++ = 'x'; -            *data++ = nibbleLookup[((unsigned char)s[i]) >> 4]; -            *data++ = nibbleLookup[((unsigned char)s[i]) & 0x0f]; +            data = escapeByte(data, s[i]);              break;          } +        if (utf8.isComplete()) { +            data = utf8.writeUtf8(data); +        }      } +    data = utf8.write(data);      *data = 0;      result.resize(data - start); | 
