6#include <private/qstringconverter_p.h>
9#include "private/qsimd_p.h"
10#include "private/qstringiterator_p.h"
11#include "private/qtools_p.h"
14#include <QtCore/qbytearraylist.h>
17#include <unicode/ucnv.h>
18#include <unicode/ucnv_cb.h>
19#include <unicode/ucnv_err.h>
20#include <unicode/ustring.h>
25#ifndef QT_BOOTSTRAPPED
26#include <QtCore/qvarlengtharray.h>
27#include <QtCore/q20iterator.h>
28#include <QtCore/private/qnumeric_p.h>
34#if __has_include(<bit>) && __cplusplus > 201703L
42static_assert(std::is_nothrow_move_constructible_v<QStringEncoder>);
43static_assert(std::is_nothrow_move_assignable_v<QStringEncoder>);
44static_assert(std::is_nothrow_move_constructible_v<QStringDecoder>);
45static_assert(std::is_nothrow_move_assignable_v<QStringDecoder>);
51#if defined(__SSE2__) || defined(__ARM_NEON__)
54#if defined(__cpp_lib_int_pow2) && __cpp_lib_int_pow2 >= 202002L
55 return std::bit_width(
v) - 1;
61 result ^=
sizeof(unsigned) * 8 - 1;
73 __m256i
data = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(
src));
74 __m128i data1 = _mm256_castsi256_si128(
data);
75 __m128i data2 = _mm256_extracti128_si256(
data, 1);
77 __m128i data1 = _mm_loadu_si128((
const __m128i*)
src);
78 __m128i data2 = _mm_loadu_si128(1+(
const __m128i*)
src);
89 __m128i
packed = _mm_packus_epi16(data1, data2);
90 __m128i nonAscii = _mm_cmpgt_epi8(
packed, _mm_setzero_si128());
96 ushort n = ~_mm_movemask_epi8(nonAscii);
101 nextAscii =
src + qBitScanReverse(
n) + 1;
112 __m128i
data = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src));
114 __m128i nonAscii = _mm_cmpgt_epi8(
packed, _mm_setzero_si128());
117 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(
dst),
packed);
119 uchar n = ~_mm_movemask_epi8(nonAscii);
121 nextAscii =
src + qBitScanReverse(
n) + 1;
136 __m128i
data = _mm_loadu_si128((
const __m128i*)
src);
139 const int BitSpacing = 2;
141 const __m256i extended = _mm256_cvtepu8_epi16(
data);
143 uint n = _mm256_movemask_epi8(extended);
146 _mm256_storeu_si256((__m256i*)
dst, extended);
150 const int BitSpacing = 1;
157 _mm_storeu_si128((__m128i*)
dst, _mm_unpacklo_epi8(
data, _mm_setzero_si128()));
158 _mm_storeu_si128(1+(__m128i*)
dst, _mm_unpackhi_epi8(
data, _mm_setzero_si128()));
172 n = qBitScanReverse(
n);
173 nextAscii =
src + (
n / BitSpacing) + 1;
179 __m128i
data = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(
src));
183 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst), _mm_unpacklo_epi8(
data, _mm_setzero_si128()));
190 n = qBitScanReverse(
n);
191 nextAscii =
src +
n + 1;
204 const __m256i
mask = _mm256_set1_epi8(
char(0x80));
206 __m256i
data = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(
src));
216 nextAscii =
src + qBitScanReverse(
n) + 1;
225 __m128i
data = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(
src));
236 nextAscii =
src + qBitScanReverse(
n) + 1;
270 __m128i data8 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src8 +
offset));
273 __m256i data16 = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(src16 +
offset));
276 __m256i datax8 = _mm256_cvtepu8_epi16(data8);
277 mask = _mm256_movemask_epi8(datax8);
282 __m256i latin1cmp = _mm256_cmpeq_epi16(datax8, data16);
283 mask = ~_mm256_movemask_epi8(latin1cmp);
288 __m128i datalo16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src16 +
offset));
289 __m128i datahi16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src16 +
offset) + 1);
292 __m128i datalo8 = _mm_unpacklo_epi8(data8, _mm_setzero_si128());
293 __m128i datahi8 = _mm_unpackhi_epi8(data8, _mm_setzero_si128());
296 __m128i latin1cmplo = _mm_cmpeq_epi16(datalo8, datalo16);
297 __m128i latin1cmphi = _mm_cmpeq_epi16(datahi8, datahi16);
298 mask = _mm_movemask_epi8(latin1cmphi) << 16;
299 mask |=
ushort(_mm_movemask_epi8(latin1cmplo));
305 mask = _mm_movemask_epi8(data8);
314 auto cmp_lt_16 = [&
mask, &
offset](
int n, __m128i data8, __m128i data16) {
317 unsigned sizemask = (1U << (2 *
n)) - 1;
320 data8 = _mm_unpacklo_epi8(data8, _mm_setzero_si128());
323 __m128i latin1cmp = _mm_cmpeq_epi16(data8, data16);
324 mask = ~_mm_movemask_epi8(latin1cmp) & sizemask;
325 mask |= _mm_movemask_epi8(data8);
332 __m128i data8 = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(src8 +
offset));
333 __m128i data16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src16 +
offset));
334 cmp_lt_16(8, data8, data16);
339 __m128i data8 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(src8 +
offset));
340 __m128i data16 = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(src16 +
offset));
341 cmp_lt_16(4, data8, data16);
350#elif defined(__ARM_NEON__)
353 uint16x8_t maxAscii = vdupq_n_u16(0x7f);
354 uint16x8_t mask1 = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
355 uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
360 uint16x8x2_t
in = vld2q_u16(
reinterpret_cast<const uint16_t *
>(
src));
364 uint16_t nonAscii = vaddvq_u16(vandq_u16(vcgtq_u16(
in.val[0], maxAscii), mask1))
365 | vaddvq_u16(vandq_u16(vcgtq_u16(
in.val[1], maxAscii), mask2));
368 uint16x8_t
out = vsliq_n_u16(
in.val[0],
in.val[1], 8);
371 vst1q_u8(
dst, vreinterpretq_u8_u16(
out));
377 nextAscii =
src + qBitScanReverse(nonAscii) + 1;
391 uint8x8_t msb_mask = vdup_n_u8(0x80);
392 uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
394 uint8x8_t
c = vld1_u8(
src);
395 uint8_t
n = vaddv_u8(vand_u8(vcge_u8(
c, msb_mask), add_mask));
398 vst1q_u16(
reinterpret_cast<uint16_t *
>(
dst), vmovl_u8(
c));
411 n = qBitScanReverse(
n);
412 nextAscii =
src +
n + 1;
427 uint8x8_t msb_mask = vdup_n_u8(0x80);
428 uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
430 uint8x8_t
c = vld1_u8(
src);
431 uint8_t
n = vaddv_u8(vand_u8(vcge_u8(
c, msb_mask), add_mask));
438 nextAscii =
src + qBitScanReverse(
n) + 1;
481 const char16_t *
src =
reinterpret_cast<const char16_t *
>(
in.data());
485 const char16_t *nextAscii =
end;
491 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(u,
dst,
src,
end);
496 }
while (
src < nextAscii);
531 const char16_t *
src =
in.utf16();
535 if (
state->remainingChars) {
539 state->state_data[0] = 0;
540 state->remainingChars = 0;
551 const char16_t *nextAscii =
end;
556 char16_t uc = *
src++;
557 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc,
cursor,
src,
end);
563 ++
state->invalidChars;
567 ++
state->invalidChars;
570 state->remainingChars = 1;
571 state->state_data[0] = uc;
573 return reinterpret_cast<char *
>(
cursor);
575 }
while (
src < nextAscii);
578 return reinterpret_cast<char *
>(
cursor);
589 *
out++ = 0b110'0'0000u | (
ch >> 6);
590 *
out++ = 0b10'00'0000u | (
ch & 0b0011'1111);
663 *
dst++ = QChar::ReplacementCharacter;
665 }
while (
src < nextAscii);
699 char16_t replacement = QChar::ReplacementCharacter;
701 replacement = QChar::Null;
711 if (
state->remainingChars || !headerdone) {
713 uchar remainingCharsData[4];
715 qsizetype newCharsToCopy = qMin<qsizetype>(
sizeof(remainingCharsData) - remainingCharsCount,
end -
src);
717 memset(remainingCharsData, 0,
sizeof(remainingCharsData));
718 memcpy(remainingCharsData, &
state->state_data[0], remainingCharsCount);
719 memcpy(remainingCharsData + remainingCharsCount,
src, newCharsToCopy);
722 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(remainingCharsData[0],
dst,
begin,
723 static_cast<const uchar *
>(remainingCharsData) + remainingCharsCount + newCharsToCopy);
725 ++
state->invalidChars;
726 *
dst++ = replacement;
731 state->remainingChars = remainingCharsCount + newCharsToCopy;
732 memcpy(&
state->state_data[0], remainingCharsData,
state->remainingChars);
734 }
else if (!headerdone) {
736 if (
dst[-1] == 0xfeff)
744 src +=
res - remainingCharsCount;
762 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(
ch,
dst,
src,
end);
765 ++
state->invalidChars;
766 *
dst++ = replacement;
773 *
dst++ = QChar::ReplacementCharacter;
774 ++
state->invalidChars;
776 *
dst++ = QChar::ReplacementCharacter;
777 ++
state->invalidChars;
779 state->remainingChars = 0;
786 state->remainingChars = 0;
804 bool isValidAscii =
true;
807 if (
src >= nextAscii)
817 isValidAscii =
false;
822 return {
false,
false };
824 }
while (
src < nextAscii);
827 return {
true, isValidAscii };
832 auto src1 =
reinterpret_cast<const qchar8_t *
>(utf8.data());
833 auto end1 = src1 + utf8.size();
834 auto src2 =
reinterpret_cast<const char16_t *
>(utf16.data());
835 auto end2 = src2 + utf16.size();
840 if (src1 < end1 && src2 < end2) {
841 char32_t uc1 = *src1++;
842 char32_t uc2 = *src2++;
846 qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraitsNoAscii>(uc1,
output, src1, end1);
849 uc1 = QChar::ReplacementCharacter;
854 if (QChar::isHighSurrogate(uc2) && src2 < end2 && QChar::isLowSurrogate(*src2))
855 uc2 = QChar::surrogateToUcs4(uc2, *src2++);
858 uc1 = QChar::toCaseFolded(uc1);
859 uc2 = QChar::toCaseFolded(uc2);
862 return int(uc1) - int(uc2);
864 }
while (src1 < end1 && src2 < end2);
867 return (end1 > src1) - int(end2 > src2);
872 char32_t uc1 = QChar::Null;
873 auto src1 =
reinterpret_cast<const uchar *
>(utf8.data());
874 auto end1 = src1 + utf8.size();
875 auto src2 =
reinterpret_cast<const uchar *
>(
s.latin1());
876 auto end2 = src2 +
s.size();
878 while (src1 < end1 && src2 < end2) {
884 uc1 = QChar::ReplacementCharacter;
887 char32_t uc2 = *src2++;
889 uc1 = QChar::toCaseFolded(uc1);
890 uc2 = QChar::toCaseFolded(uc2);
893 return int(uc1) - int(uc2);
897 return (end1 > src1) - (end2 > src2);
906 const auto l = std::min(lhs.size(), rhs.size());
907 int r = memcmp(lhs.data(), rhs.data(), l);
911 char32_t uc1 = QChar::Null;
912 auto src1 =
reinterpret_cast<const uchar *
>(lhs.data());
913 auto end1 = src1 + lhs.size();
914 char32_t uc2 = QChar::Null;
915 auto src2 =
reinterpret_cast<const uchar *
>(rhs.data());
916 auto end2 = src2 + rhs.size();
918 while (src1 < end1 && src2 < end2) {
924 uc1 = QChar::ReplacementCharacter;
929 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(
b,
output, src2, end2);
932 uc2 = QChar::ReplacementCharacter;
935 uc1 = QChar::toCaseFolded(uc1);
936 uc2 = QChar::toCaseFolded(uc2);
938 return int(uc1) - int(uc2);
942 return (end1 > src1) - (end2 > src2);
945#ifndef QT_BOOTSTRAPPED
970 QChar bom(QChar::ByteOrderMark);
978 qToBigEndian<char16_t>(
in.data(),
in.size(),
out);
980 qToLittleEndian<char16_t>(
in.data(),
in.size(),
out);
982 state->remainingChars = 0;
984 return out + 2*
in.size();
998 const char *chars =
in.data();
1005 const char *
end = chars +
len;
1008 if (
state->remainingChars +
len < 2) {
1011 state->remainingChars = 1;
1021 if (!headerdone ||
state->remainingChars) {
1023 if (
state->remainingChars)
1033 if (
ch == QChar::ByteOrderSwapped) {
1035 }
else if (
ch == QChar::ByteOrderMark) {
1046 ch = QChar::fromUcs2((
ch.unicode() >> 8) | ((
ch.unicode() & 0xff) << 8));
1047 if (headerdone ||
ch != QChar::ByteOrderMark)
1055 qFromBigEndian<char16_t>(chars, nPairs,
out);
1057 qFromLittleEndian<char16_t>(chars, nPairs,
out);
1061 state->remainingChars = 0;
1062 if ((
end - chars) & 1) {
1066 state->remainingChars = 1;
1101 out[2] = (char)0xfe;
1102 out[3] = (char)0xff;
1104 out[0] = (char)0xff;
1105 out[1] = (char)0xfe;
1117 if (
state->remainingChars == 1) {
1118 auto character =
state->state_data[
Data];
1122 state->remainingChars = 0;
1123 goto decode_surrogate;
1129 ucs4 =
ch.unicode();
1136 state->remainingChars = 1;
1140 }
else if (uc->isLowSurrogate()) {
1141 ucs4 = QChar::surrogateToUcs4(
ch, *uc++);
1161 result.resize((
in.size() + 7) >> 1);
1170 const char *chars =
in.data();
1176 const char *
end = chars +
len;
1179 memcpy(tuple, &
state->state_data[
Data], 4);
1182 if (
state->remainingChars +
len < 4) {
1184 while (chars <
end) {
1185 tuple[
state->remainingChars] = *chars;
1186 ++
state->remainingChars;
1190 memcpy(&
state->state_data[
Data], tuple, 4);
1200 state->remainingChars = 0;
1204 tuple[
num++] = *chars++;
1207 if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0) {
1209 }
else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff) {
1217 char32_t code = (endian ==
BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
1218 if (headerdone || code != QChar::ByteOrderMark) {
1219 if (QChar::requiresSurrogates(code)) {
1220 *
out++ =
QChar(QChar::highSurrogate(code));
1221 *
out++ =
QChar(QChar::lowSurrogate(code));
1233 while (chars <
end) {
1234 tuple[
num++] = *chars++;
1236 char32_t code = (endian ==
BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
1237 for (
char16_t c : QChar::fromUcs4(code))
1245 *
out++ = QChar::ReplacementCharacter;
1249 memcpy(&
state->state_data[
Data], tuple, 4);
1257#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
1258int QLocal8Bit::checkUtf8()
1260 return GetACP() == CP_UTF8 ? 1 : -1;
1265 return convertToUnicode_sys(
in, CP_ACP,
state);
1271 const char *mb =
in.
data();
1277 const bool useNullForReplacement = !!(
state->flags & Flag::ConvertInvalidToNull);
1278 const char16_t replacementCharacter = useNullForReplacement ? QChar::Null
1279 : QChar::ReplacementCharacter;
1280 if (
state->flags & Flag::Stateless) {
1293 std::array<wchar_t, 4096>
buf;
1294 wchar_t *
out =
buf.data();
1300 const auto growOut = [&](
qsizetype size) -> std::tuple<wchar_t *, qsizetype> {
1302 return {
out, outlen};
1303 const bool wasStackBuffer =
sp.isEmpty();
1304 const auto begin = wasStackBuffer ?
buf.data() :
reinterpret_cast<wchar_t *
>(
sp.data());
1309 return {
nullptr, 0};
1312 auto it =
reinterpret_cast<wchar_t *
>(
sp.data());
1321 while (
state &&
state->remainingChars && mblen) {
1328 std::array<char, 6> prev = {0};
1350 std::tie(
out, outlen) = growOut(tmp.
size());
1354 outlen -= tmp.
size();
1355 const qsizetype tail = toCopy - localState.remainingChars;
1360 mb -= localState.remainingChars;
1361 mblen += localState.remainingChars;
1362 localState.remainingChars = 0;
1364 state->remainingChars = localState.remainingChars;
1365 state->invalidChars += localState.invalidChars;
1366 std::copy_n(localState.state_data,
state->remainingChars,
state->state_data);
1373 int nextIn = qt_saturate<int>(mblen);
1375 std::tie(
out, outlen) = growOut(1);
1378 const int nextOut = qt_saturate<int>(outlen);
1379 int len = MultiByteToWideChar(codePage, MB_ERR_INVALID_CHARS, mb, nextIn,
out, nextOut);
1386 int r = GetLastError();
1387 if (
r == ERROR_INSUFFICIENT_BUFFER) {
1388 const int wclen = MultiByteToWideChar(codePage, 0, mb, nextIn, 0, 0);
1389 std::tie(
out, outlen) = growOut(wclen);
1392 }
else if (
r == ERROR_NO_UNICODE_TRANSLATION) {
1399 state->remainingChars = mblen;
1400 std::copy_n(mb, mblen,
state->state_data);
1413 const auto it = CharPrevExA(codePage, mb, mb + nextIn, 0);
1415 nextIn = int(
it - mb);
1423 std::tie(
out, outlen) = growOut(1);
1426 *
out = replacementCharacter;
1434 qWarning(
"MultiByteToWideChar: Cannot convert multibyte text");
1438 nextIn = qt_saturate<int>(mblen);
1446 const auto begin =
reinterpret_cast<wchar_t *
>(
sp.data());
1450 if (
sp.size() &&
sp.back().isNull())
1453 if (!
state && mblen > 0) {
1456 sp.resize(
sp.size() + mblen, replacementCharacter);
1457 invalidChars += mblen;
1464 return convertFromUnicode_sys(
in, CP_ACP,
state);
1470 const wchar_t *
ch =
reinterpret_cast<const wchar_t *
>(
in.data());
1482 if (
state->flags & Flag::Stateless) {
1497 std::array<char, 4096>
buf;
1505 wchar_t wc[2] = { wchar_t(
state->state_data[0]),
ch[0] };
1509 const bool validCodePoint = QChar::isLowSurrogate(wc[1]);
1510 int len = WideCharToMultiByte(codePage, 0, wc, validCodePoint ? 2 : 1,
out, outlen,
nullptr,
1516 if (validCodePoint) {
1520 state->remainingChars = 0;
1521 state->state_data[0] = 0;
1526 if (
state && QChar::isHighSurrogate(
ch[uclen - 1])) {
1529 state->remainingChars = 1;
1530 state->state_data[0] =
ch[uclen - 1];
1539 const auto growOut = [&](
qsizetype size) -> std::tuple<char *, qsizetype> {
1541 return {
out, outlen};
1542 const bool wasStackBuffer = mb.isEmpty();
1543 const auto begin = wasStackBuffer ?
buf.data() : mb.data();
1548 return {
nullptr, 0};
1551 auto it = mb.data();
1559 const auto getNextWindowSize = [&]() {
1560 int nextIn = qt_saturate<int>(uclen);
1563 if (nextIn > 1 && QChar::isHighSurrogate(
ch[nextIn - 1]))
1570 const int nextIn = getNextWindowSize();
1571 std::tie(
out, outlen) = growOut(1);
1574 const int nextOut = qt_saturate<int>(outlen);
1575 len = WideCharToMultiByte(codePage, 0,
ch, nextIn,
out, nextOut,
nullptr,
nullptr);
1582 int r = GetLastError();
1583 if (
r == ERROR_INSUFFICIENT_BUFFER) {
1584 int neededLength = WideCharToMultiByte(codePage, 0,
ch, nextIn,
nullptr, 0,
1586 if (neededLength <= 0) {
1594 "WideCharToMultiByte: Cannot convert multibyte text (error %d)\n",
r);
1598 std::tie(
out, outlen) = growOut(neededLength);
1607 "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
r,
1608 reinterpret_cast<const wchar_t *
>(
1626void QStringConverter::State::clear() noexcept
1637void QStringConverter::State::reset() noexcept
1641 UConverter *converter =
static_cast<UConverter *
>(
d[0]);
1643 ucnv_reset(converter);
1652#ifndef QT_BOOTSTRAPPED
1727 *
out = (char)
in[
i].cell();
1732 state->invalidChars += invalid;
1739 memcpy(
out,
s.constData(),
s.size()*
sizeof(
QChar));
1740 return out +
s.size();
1746 memcpy(
out,
s.constData(),
s.size());
1747 return out +
s.size();
1754#ifndef QT_BOOTSTRAPPED
1900#ifndef QT_BOOTSTRAPPED
1916 while (*
a ==
'-' || *
a ==
'_')
1918 while (*
b ==
'-' || *
b ==
'_')
1945 ucnv_close(
static_cast<UConverter *
>(
state->d[0]));
1946 state->d[0] =
nullptr;
1953 if (
state->d[0] ==
nullptr)
1954 state->d[0] = createConverterForName(
static_cast<const char *
>(
state->d[1]),
state);
1959 ensureConverter(
state);
1961 auto icu_conv =
static_cast<UConverter *
>(
state->d[0]);
1962 UErrorCode err = U_ZERO_ERROR;
1964 auto sourceLimit =
in.data() +
in.size();
1968 UChar *
target =
reinterpret_cast<UChar *
>(
out);
1972 UBool
flush =
false;
1975 UConverterToUCallback action;
1977 ucnv_getToUCallBack(icu_conv, &action, &
context);
1979 ucnv_setToUCallBack(icu_conv, action,
state,
nullptr,
nullptr, &err);
1981 ucnv_toUnicode(icu_conv, &
target, targetLimit, &
source, sourceLimit,
nullptr, flush, &err);
1983 Q_ASSERT(err != U_BUFFER_OVERFLOW_ERROR);
1985 if (
auto leftOver = ucnv_toUCountPending(icu_conv, &err)) {
1986 ucnv_reset(icu_conv);
1987 state->invalidChars += leftOver;
1995 ensureConverter(
state);
1996 auto icu_conv =
static_cast<UConverter *
>(
state->d[0]);
1997 UErrorCode err = U_ZERO_ERROR;
1998 auto source =
reinterpret_cast<const UChar *
>(
in.data());
1999 auto sourceLimit =
reinterpret_cast<const UChar *
>(
in.data() +
in.size());
2001 qsizetype length = UCNV_GET_MAX_BYTES_FOR_STRING(
in.size(), ucnv_getMaxCharSize(icu_conv));
2005 UBool
flush =
false;
2008 UConverterFromUCallback action;
2010 ucnv_getFromUCallBack(icu_conv, &action, &
context);
2012 ucnv_setFromUCallBack(icu_conv, action,
state,
nullptr,
nullptr, &err);
2014 ucnv_fromUnicode(icu_conv, &
target, targetLimit, &
source, sourceLimit,
nullptr, flush, &err);
2016 Q_ASSERT(err != U_BUFFER_OVERFLOW_ERROR);
2018 if (
auto leftOver = ucnv_fromUCountPending(icu_conv, &err)) {
2019 ucnv_reset(icu_conv);
2020 state->invalidChars += leftOver;
2026 Q_DISABLE_COPY_MOVE(QStringConverterICU)
2028 template<qsizetype X>
2031 return X * inLength *
sizeof(UChar);
2042 return 2 * inLength;
2046 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<1>},
2047 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<2>},
2048 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<3>},
2049 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<4>},
2050 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<5>},
2051 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<6>},
2052 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<7>},
2053 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<8>}
2056 static UConverter *createConverterForName(
const char *
name,
const State *
state)
2060 UErrorCode status = U_ZERO_ERROR;
2061 UConverter *conv = ucnv_open(
name, &status);
2062 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
2067 if (
state->flags.testFlag(Flag::ConvertInvalidToNull)) {
2068 UErrorCode
error = U_ZERO_ERROR;
2070 auto nullToSubstituter = [](
const void *
context, UConverterToUnicodeArgs *toUArgs,
2071 const char *, int32_t
length,
2072 UConverterCallbackReason reason, UErrorCode *err) {
2073 if (reason <= UCNV_IRREGULAR) {
2074 *err = U_ZERO_ERROR;
2076 ucnv_cbToUWriteUChars(toUArgs, &
c, 1, 0, err);
2082 ucnv_setToUCallBack(conv, nullToSubstituter,
state,
nullptr,
nullptr, &
error);
2084 auto nullFromSubstituter = [](
const void *
context, UConverterFromUnicodeArgs *fromUArgs,
2085 const UChar *, int32_t
length,
2086 UChar32, UConverterCallbackReason reason, UErrorCode *err) {
2087 if (reason <= UCNV_IRREGULAR) {
2088 *err = U_ZERO_ERROR;
2089 const UChar replacement[] = { 0 };
2090 const UChar *stringBegin = std::begin(replacement);
2091 ucnv_cbFromUWriteUChars(fromUArgs, &stringBegin, std::end(replacement), 0, err);
2097 ucnv_setFromUCallBack(conv, nullFromSubstituter,
state,
nullptr,
nullptr, &
error);
2099 UErrorCode
error = U_ZERO_ERROR;
2101 auto qmarkToSubstituter = [](
const void *
context, UConverterToUnicodeArgs *toUArgs,
2102 const char *codeUnits,int32_t
length,
2103 UConverterCallbackReason reason, UErrorCode *err) {
2104 if (reason <= UCNV_IRREGULAR) {
2110 UCNV_TO_U_CALLBACK_SUBSTITUTE(
nullptr, toUArgs, codeUnits,
length, reason, err);
2113 ucnv_setToUCallBack(conv, qmarkToSubstituter,
state,
nullptr,
nullptr, &
error);
2115 auto qmarkFromSubstituter = [](
const void *
context, UConverterFromUnicodeArgs *fromUArgs,
2116 const UChar *codeUnits, int32_t
length,
2117 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) {
2118 if (reason <= UCNV_IRREGULAR) {
2124 UCNV_FROM_U_CALLBACK_SUBSTITUTE(
nullptr, fromUArgs, codeUnits,
length,
2125 codePoint, reason, err);
2127 ucnv_setFromUCallBack(conv, qmarkFromSubstituter,
state,
nullptr,
nullptr, &
error);
2136 UErrorCode status = U_ZERO_ERROR;
2137 UConverter *conv = createConverterForName(
name,
state);
2141 const char *icuName = ucnv_getName(conv, &status);
2144 const char *persistentName = ucnv_getStandardName(icuName,
"MIME", &status);
2145 if (U_FAILURE(status) || !persistentName) {
2146 status = U_ZERO_ERROR;
2147 persistentName = ucnv_getStandardName(icuName,
"IANA", &status);
2149 state->d[1] =
const_cast<char *
>(persistentName);
2152 qsizetype maxCharSize = ucnv_getMaxCharSize(conv);
2153 state->clearFn = QStringConverterICU::clear_function;
2154 if (maxCharSize > 8 || maxCharSize < 1) {
2155 qWarning(
"Encountered unexpected codec \"%s\" which requires >8x space",
name);
2158 return &forLength[maxCharSize - 1];
2174 iface = encodingInterfaces + int(*e);
2188 return static_cast<const char*
>(
state.
d[1]);
2245 return std::nullopt;
2252 return std::nullopt;
2255#ifndef QT_BOOTSTRAPPED
2263std::optional<QStringConverter::Encoding>
2268 if (arraySize > 3) {
2269 char32_t uc = qFromUnaligned<char32_t>(
data.data());
2270 if (uc ==
qToBigEndian(
char32_t(QChar::ByteOrderMark)))
2274 if (expectedFirstCharacter) {
2283 if (arraySize > 2) {
2288 if (arraySize > 1) {
2289 char16_t uc = qFromUnaligned<char16_t>(
data.data());
2290 if (uc ==
qToBigEndian(
char16_t(QChar::ByteOrderMark)))
2294 if (expectedFirstCharacter) {
2302 return std::nullopt;
2322 while (++pos2 <
header.size()) {
2324 if (
ch ==
'\"' ||
ch ==
'\'' ||
ch ==
'>' ||
ch ==
'/') {
2330 if (
name ==
"unicode")
2332 if (!
name.isEmpty())
2358 if (!encodingTag.isEmpty())
2372 return 1 + ucnv_countAvailable();
2400 UErrorCode status = U_ZERO_ERROR;
2401 auto icuName = ucnv_getAvailableName(int32_t(
index - 1));
2402 const char *standardName = ucnv_getStandardName(icuName,
"MIME", &status);
2403 if (U_FAILURE(status) || !standardName) {
2404 status = U_ZERO_ERROR;
2405 standardName = ucnv_getStandardName(icuName,
"IANA", &status);
2408 standardName = icuName;
2415 result.reserve(codecCount);
2417 result.push_back(availableCodec(
i));
2439 if (!encodingTag.isEmpty())
2451 return encodingInterfaces[int(e)].name;
QByteArray toByteArray() const
char * data()
\macro QT_NO_CAST_FROM_BYTEARRAY
const char * constData() const noexcept
Returns a pointer to the const data stored in the byte array.
void truncate(qsizetype pos)
Truncates the byte array at index position pos.
Q_CORE_EXPORT const char * name() const noexcept
Returns the canonical name of the encoding this QStringConverter can encode or decode.
static Q_CORE_EXPORT std::optional< Encoding > encodingForHtml(QByteArrayView data)
Tries to determine the encoding of the HTML in data by looking at leading byte order marks or a chars...
static Q_CORE_EXPORT const char * nameForEncoding(Encoding e)
Returns the canonical name for encoding e.
Encoding
\value Utf8 Create a converter to or from UTF-8 \value Utf16 Create a converter to or from UTF-16.
static Q_CORE_EXPORT QStringList availableCodecs()
Returns a list of names of supported codecs.
static Q_CORE_EXPORT std::optional< Encoding > encodingForName(const char *name) noexcept
Convert name to the corresponding \l Encoding member, if there is one.
constexpr QStringConverter() noexcept
static Q_CORE_EXPORT std::optional< Encoding > encodingForData(QByteArrayView data, char16_t expectedFirstCharacter=0) noexcept
Returns the encoding for the content of data if it can be determined.
static Q_CORE_EXPORT QStringDecoder decoderForHtml(QByteArrayView data)
Tries to determine the encoding of the HTML in data by looking at leading byte order marks or a chars...
constexpr QStringDecoder() noexcept
Default constructs an decoder.
QString toString() const
Returns a deep copy of this string view's data as a QString.
\macro QT_RESTRICTED_CAST_FROM_ASCII
static QString fromLatin1(QByteArrayView ba)
This is an overloaded member function, provided for convenience. It differs from the above function o...
const QChar * constData() const
Returns a pointer to the data stored in the QString.
qsizetype size() const noexcept
Returns the number of characters in this string.
QChar * data()
Returns a pointer to the data stored in the QString.
QSet< QString >::iterator it
Combined button and popup list for selecting options.
constexpr int qt_lencmp(qsizetype lhs, qsizetype rhs) noexcept
constexpr char toAsciiLower(char ch) noexcept
QTextStream & flush(QTextStream &stream)
Calls QTextStream::flush() on stream and returns stream.
constexpr Initialization Uninitialized
constexpr auto ssize(const C &c) -> std::common_type_t< std::ptrdiff_t, std::make_signed_t< decltype(c.size())> >
QT_POPCOUNT_RELAXED_CONSTEXPR uint qCountLeadingZeroBits(quint32 v) noexcept
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
#define QByteArrayLiteral(str)
size_t qstrlen(const char *str)
constexpr QStaticByteArrayMatcher< N > qMakeStaticByteArrayMatcher(const char(&pattern)[N]) noexcept
DBusConnection const char DBusError * error
static QString header(const QString &name)
typedef QByteArray(EGLAPIENTRYP PFNQGSGETDISPLAYSPROC)()
constexpr T qToBigEndian(T source)
constexpr T qToLittleEndian(T source)
constexpr const T & qMin(const T &a, const T &b)
std::enable_if_t< std::is_unsigned_v< T >, bool > qAddOverflow(T v1, T v2, T *r)
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * v
[13]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLenum GLuint GLintptr GLsizeiptr size
[1]
GLenum GLuint GLenum GLsizei length
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLenum GLuint GLenum GLsizei const GLchar * buf
GLenum GLuint GLintptr offset
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
GLsizei GLsizei GLchar * source
GLuint GLenum GLsizei GLsizei GLint GLint GLboolean packed
QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
static const uchar utf8bom[]
static QChar * fromUtf32LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static QChar * fromUtf16LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static QByteArray parseHtmlMetaForEncoding(QByteArrayView data)
static QChar * fromUtf32BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static qsizetype toUtf8Len(qsizetype l)
static QChar * fromLocal8Bit(QChar *out, QByteArrayView in, QStringConverter::State *state)
static bool simdDecodeAscii(char16_t *, const uchar *, const uchar *, const uchar *)
static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
static qsizetype toLatin1Len(qsizetype l)
static const uchar * simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
static bool simdEncodeAscii(uchar *, const char16_t *, const char16_t *, const char16_t *)
static QChar * fromUtf32(QChar *out, QByteArrayView in, QStringConverter::State *state)
static char * toUtf32(char *out, QStringView in, QStringConverter::State *state)
static char * toUtf16LE(char *out, QStringView in, QStringConverter::State *state)
static qsizetype fromUtf8Len(qsizetype l)
static char * toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
static qsizetype toUtf16Len(qsizetype l)
static qsizetype fromLatin1Len(qsizetype l)
static char * toUtf16BE(char *out, QStringView in, QStringConverter::State *state)
static char * toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
static qsizetype fromUtf32Len(qsizetype l)
static qsizetype availableCodecCount()
static bool nameMatch(const char *a, const char *b)
static QChar * fromUtf16BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static qsizetype toUtf32Len(qsizetype l)
static qsizetype fromUtf16Len(qsizetype l)
static char * toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
QT_BEGIN_NAMESPACE typedef uchar * output
Q_CHECK_PTR(a=new int[80])
QTextStream out(stdout)
[7]
char * toString(const MyType &t)
[31]
static char16_t * convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept
static char * convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
static Q_CORE_EXPORT QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static QChar * convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian)
static const int EndOfString
static void appendUtf16(const NoOutput &, char16_t)
static void appendUcs4(const NoOutput &, char32_t)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView in)
static int compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
static QChar * convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
static ValidUtf8Result isValidUtf8(QByteArrayView in)
static Q_CORE_EXPORT char * convertFromLatin1(char *out, QLatin1StringView in)