Qt
Internal/Contributor docs for the Qt SDK. <b>Note:</b> These are NOT official API docs; those are found <a href='https://doc.qt.io/'>here</a>.
Loading...
Searching...
No Matches
qstringconverter_p.h
Go to the documentation of this file.
1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2020 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#ifndef QSTRINGCONVERTER_P_H
6#define QSTRINGCONVERTER_P_H
7
8//
9// W A R N I N G
10// -------------
11//
12// This file is not part of the Qt API. It exists purely as an
13// implementation detail. This header file may change from version to
14// version without notice, or even be removed.
15//
16// We mean it.
17//
18
19#include <QtCore/qstring.h>
20#include <QtCore/qendian.h>
21#include <QtCore/qstringconverter.h>
22#include <QtCore/private/qglobal_p.h>
23
25
26#ifndef __cpp_char8_t
27enum qchar8_t : uchar {};
28#else
29using qchar8_t = char8_t;
30#endif
31
32struct QLatin1
33{
34 // Defined in qstring.cpp
35 static char16_t *convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept;
36
38 {
39 char16_t *dst = reinterpret_cast<char16_t *>(buffer);
41 return reinterpret_cast<QChar *>(dst);
42 }
43
45 [[maybe_unused]] QStringConverterBase::State *state) noexcept
46 {
48
49 return convertToUnicode(dst, QLatin1StringView(in.data(), in.size()));
50 }
51
52 static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept;
53
54 // Defined in qstring.cpp
55 static char *convertFromUnicode(char *out, QStringView in) noexcept;
56};
57
59{
60 static const bool isTrusted = false;
61 static const bool allowNonCharacters = true;
62 static const bool skipAsciiHandling = false;
63 static const int Error = -1;
64 static const int EndOfString = -2;
65
66 static void appendByte(uchar *&ptr, uchar b)
67 { *ptr++ = b; }
68
70 { *ptr++ = b; }
71
72 static uchar peekByte(const char *ptr, qsizetype n = 0)
73 { return ptr[n]; }
74
75 static uchar peekByte(const uchar *ptr, qsizetype n = 0)
76 { return ptr[n]; }
77
78 static uchar peekByte(const qchar8_t *ptr, qsizetype n = 0)
79 { return ptr[n]; }
80
81 static qptrdiff availableBytes(const char *ptr, const char *end)
82 { return end - ptr; }
83
84 static qptrdiff availableBytes(const uchar *ptr, const uchar *end)
85 { return end - ptr; }
86
88 { return end - ptr; }
89
90 static void advanceByte(const char *&ptr, qsizetype n = 1)
91 { ptr += n; }
92
93 static void advanceByte(const uchar *&ptr, qsizetype n = 1)
94 { ptr += n; }
95
96 static void advanceByte(const qchar8_t *&ptr, qsizetype n = 1)
97 { ptr += n; }
98
99 static void appendUtf16(char16_t *&ptr, char16_t uc)
100 { *ptr++ = char16_t(uc); }
101
102 static void appendUcs4(char16_t *&ptr, char32_t uc)
103 {
104 appendUtf16(ptr, QChar::highSurrogate(uc));
105 appendUtf16(ptr, QChar::lowSurrogate(uc));
106 }
107
108 static char16_t peekUtf16(const char16_t *ptr, qsizetype n = 0) { return ptr[n]; }
109
110 static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end)
111 { return end - ptr; }
112
113 static void advanceUtf16(const char16_t *&ptr, qsizetype n = 1) { ptr += n; }
114
115 static void appendUtf16(char32_t *&ptr, char16_t uc)
116 { *ptr++ = char32_t(uc); }
117
118 static void appendUcs4(char32_t *&ptr, char32_t uc)
119 { *ptr++ = uc; }
120};
121
123{
124 static const bool skipAsciiHandling = true;
125};
126
128{
133 template <typename Traits, typename OutputPtr, typename InputPtr> inline
134 int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end)
135 {
136 if (!Traits::skipAsciiHandling && u < 0x80) {
137 // U+0000 to U+007F (US-ASCII) - one byte
138 Traits::appendByte(dst, uchar(u));
139 return 0;
140 } else if (u < 0x0800) {
141 // U+0080 to U+07FF - two bytes
142 // first of two bytes
143 Traits::appendByte(dst, 0xc0 | uchar(u >> 6));
144 } else {
145 if (!QChar::isSurrogate(u)) {
146 // U+0800 to U+FFFF (except U+D800-U+DFFF) - three bytes
147 if (!Traits::allowNonCharacters && QChar::isNonCharacter(u))
148 return Traits::Error;
149
150 // first of three bytes
151 Traits::appendByte(dst, 0xe0 | uchar(u >> 12));
152 } else {
153 // U+10000 to U+10FFFF - four bytes
154 // need to get one extra codepoint
155 if (Traits::availableUtf16(src, end) == 0)
156 return Traits::EndOfString;
157
158 char16_t low = Traits::peekUtf16(src);
159 if (!QChar::isHighSurrogate(u))
160 return Traits::Error;
161 if (!QChar::isLowSurrogate(low))
162 return Traits::Error;
163
164 Traits::advanceUtf16(src);
165 char32_t ucs4 = QChar::surrogateToUcs4(u, low);
166
167 if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4))
168 return Traits::Error;
169
170 // first byte
171 Traits::appendByte(dst, 0xf0 | (uchar(ucs4 >> 18) & 0xf));
172
173 // second of four bytes
174 Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f));
175
176 // for the rest of the bytes
177 u = char16_t(ucs4);
178 }
179
180 // second to last byte
181 Traits::appendByte(dst, 0x80 | (uchar(u >> 6) & 0x3f));
182 }
183
184 // last byte
185 Traits::appendByte(dst, 0x80 | (u & 0x3f));
186 return 0;
187 }
188
190 {
191 return (b & 0xc0) == 0x80;
192 }
193
196 template <typename Traits, typename OutputPtr, typename InputPtr> inline
197 qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
198 {
199 qsizetype charsNeeded;
200 char32_t min_uc;
201 char32_t uc;
202
203 if (!Traits::skipAsciiHandling && b < 0x80) {
204 // US-ASCII
205 Traits::appendUtf16(dst, b);
206 return 1;
207 }
208
209 if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) {
210 // an UTF-8 first character must be at least 0xC0
211 // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences
212 return Traits::Error;
213 } else if (b < 0xe0) {
214 charsNeeded = 2;
215 min_uc = 0x80;
216 uc = b & 0x1f;
217 } else if (b < 0xf0) {
218 charsNeeded = 3;
219 min_uc = 0x800;
220 uc = b & 0x0f;
221 } else if (b < 0xf5) {
222 charsNeeded = 4;
223 min_uc = 0x10000;
224 uc = b & 0x07;
225 } else {
226 // the last Unicode character is U+10FFFF
227 // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF"
228 // therefore, a byte higher than 0xF4 is not the UTF-8 first byte
229 return Traits::Error;
230 }
231
232 qptrdiff bytesAvailable = Traits::availableBytes(src, end);
233 if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) {
234 // it's possible that we have an error instead of just unfinished bytes
235 if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0)))
236 return Traits::Error;
237 if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1)))
238 return Traits::Error;
239 return Traits::EndOfString;
240 }
241
242 // first continuation character
243 b = Traits::peekByte(src, 0);
244 if (!isContinuationByte(b))
245 return Traits::Error;
246 uc <<= 6;
247 uc |= b & 0x3f;
248
249 if (charsNeeded > 2) {
250 // second continuation character
251 b = Traits::peekByte(src, 1);
252 if (!isContinuationByte(b))
253 return Traits::Error;
254 uc <<= 6;
255 uc |= b & 0x3f;
256
257 if (charsNeeded > 3) {
258 // third continuation character
259 b = Traits::peekByte(src, 2);
260 if (!isContinuationByte(b))
261 return Traits::Error;
262 uc <<= 6;
263 uc |= b & 0x3f;
264 }
265 }
266
267 // we've decoded something; safety-check it
268 if (!Traits::isTrusted) {
269 if (uc < min_uc)
270 return Traits::Error;
271 if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint)
272 return Traits::Error;
273 if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc))
274 return Traits::Error;
275 }
276
277 // write the UTF-16 sequence
278 if (!QChar::requiresSurrogates(uc)) {
279 // UTF-8 decoded and no surrogates are required
280 // detach if necessary
281 Traits::appendUtf16(dst, char16_t(uc));
282 } else {
283 // UTF-8 decoded to something that requires a surrogate pair
284 Traits::appendUcs4(dst, uc);
285 }
286
287 Traits::advanceByte(src, charsNeeded - 1);
288 return charsNeeded;
289 }
290}
291
298
299struct QUtf8
300{
302 {
303 char16_t *dst = reinterpret_cast<char16_t *>(buffer);
305 return reinterpret_cast<QChar *>(dst);
306 }
307
308 Q_CORE_EXPORT static char16_t* convertToUnicode(char16_t *dst, QByteArrayView in) noexcept;
311
313 {
314 char16_t *buffer = reinterpret_cast<char16_t *>(out);
316 return reinterpret_cast<QChar *>(buffer);
317 }
318
319 static char16_t *convertToUnicode(char16_t *dst, QByteArrayView in, QStringConverter::State *state);
320
321 Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in);
324 Q_CORE_EXPORT static char *convertFromLatin1(char *out, QLatin1StringView in);
330 static int compareUtf8(QByteArrayView utf8, QStringView utf16,
334 static int compareUtf8(QByteArrayView lhs, QByteArrayView rhs,
336};
337
345
353
354struct Q_CORE_EXPORT QLocal8Bit
355{
356#if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED)
361#else
362 static int checkUtf8();
363 static bool isUtf8()
364 {
365 Q_CONSTINIT
366 static QBasicAtomicInteger<qint8> result = { 0 };
367 int r = result.loadRelaxed();
368 if (r == 0) {
369 r = checkUtf8();
370 result.storeRelaxed(r);
371 }
372 return r > 0;
373 }
374 static QString convertToUnicode_sys(QByteArrayView, quint32, QStringConverter::State *);
375 static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *);
377 {
378 if (isUtf8())
380 return convertToUnicode_sys(in, state);
381 }
382 static QByteArray convertFromUnicode_sys(QStringView, quint32, QStringConverter::State *);
383 static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *);
384 static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
385 {
386 if (isUtf8())
388 return convertFromUnicode_sys(in, state);
389 }
390#endif
391};
392
394
395#endif // QSTRINGCONVERTER_P_H
\inmodule QtCore
Definition qbytearray.h:57
\inmodule QtCore
\inmodule QtCore
Definition qstringview.h:78
\macro QT_RESTRICTED_CAST_FROM_ASCII
Definition qstring.h:129
else opt state
[0]
Combined button and popup list for selecting options.
qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end)
bool isContinuationByte(uchar b)
CaseSensitivity
@ CaseSensitive
#define Q_UNLIKELY(x)
static ControlElement< T > * ptr(QWidget *widget)
GLboolean GLboolean GLboolean b
GLboolean r
[2]
GLuint GLuint end
GLenum src
GLenum GLuint buffer
GLenum GLenum dst
GLfloat n
GLdouble s
[6]
Definition qopenglext.h:235
GLuint in
GLuint64EXT * result
[6]
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
@ LittleEndianness
@ DetectEndianness
@ BigEndianness
unsigned int quint32
Definition qtypes.h:50
unsigned char uchar
Definition qtypes.h:32
ptrdiff_t qptrdiff
Definition qtypes.h:164
ptrdiff_t qsizetype
Definition qtypes.h:165
static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
Definition qurlidna.cpp:854
QTextStream out(stdout)
[7]
static char16_t * convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept
Definition qstring.cpp:5687
static QChar * convertToUnicode(QChar *dst, QByteArrayView in, QStringConverterBase::State *state) noexcept
static char * convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
static QChar * convertToUnicode(QChar *buffer, QLatin1StringView in) noexcept
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
static Q_CORE_EXPORT QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static QChar * convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian)
static const bool skipAsciiHandling
static void appendByte(qchar8_t *&ptr, qchar8_t b)
static uchar peekByte(const uchar *ptr, qsizetype n=0)
static qptrdiff availableBytes(const qchar8_t *ptr, const qchar8_t *end)
static void appendByte(uchar *&ptr, uchar b)
static void advanceByte(const uchar *&ptr, qsizetype n=1)
static const bool isTrusted
static void appendUtf16(char32_t *&ptr, char16_t uc)
static const bool skipAsciiHandling
static char16_t peekUtf16(const char16_t *ptr, qsizetype n=0)
static const int Error
static void appendUcs4(char16_t *&ptr, char32_t uc)
static const int EndOfString
static void advanceUtf16(const char16_t *&ptr, qsizetype n=1)
static uchar peekByte(const char *ptr, qsizetype n=0)
static void advanceByte(const char *&ptr, qsizetype n=1)
static uchar peekByte(const qchar8_t *ptr, qsizetype n=0)
static void appendUtf16(char16_t *&ptr, char16_t uc)
static const bool allowNonCharacters
static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end)
static qptrdiff availableBytes(const char *ptr, const char *end)
static void appendUcs4(char32_t *&ptr, char32_t uc)
static qptrdiff availableBytes(const uchar *ptr, const uchar *end)
static void advanceByte(const qchar8_t *&ptr, qsizetype n=1)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView in)
static int compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
static QChar * convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
static ValidUtf8Result isValidUtf8(QByteArrayView in)
static Q_CORE_EXPORT char * convertFromLatin1(char *out, QLatin1StringView in)
static QChar * convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state)