Qt
Internal/Contributor docs for the Qt SDK. <b>Note:</b> These are NOT official API docs; those are found <a href='https://doc.qt.io/'>here</a>.
Loading...
Searching...
No Matches
qurlidna.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2016 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qurl_p.h"
6
7#include <QtCore/qstringlist.h>
8#include <QtCore/private/qnumeric_p.h>
9#include <QtCore/private/qoffsetstringarray_p.h>
10#include <QtCore/private/qstringiterator_p.h>
11#include <QtCore/private/qunicodetables_p.h>
12
13#include <algorithm>
14
16
17using namespace Qt::StringLiterals;
18
19// needed by the punycode encoder/decoder
20static const uint base = 36;
21static const uint tmin = 1;
22static const uint tmax = 26;
23static const uint skew = 38;
24static const uint damp = 700;
25static const uint initial_bias = 72;
26static const uint initial_n = 128;
27
28static constexpr qsizetype MaxDomainLabelLength = 63;
29
30static inline uint encodeDigit(uint digit)
31{
32 return digit + 22 + 75 * (digit < 26);
33}
34
35static inline uint adapt(uint delta, uint numpoints, bool firsttime)
36{
37 delta /= (firsttime ? damp : 2);
38 delta += (delta / numpoints);
39
40 uint k = 0;
41 for (; delta > ((base - tmin) * tmax) / 2; k += base)
42 delta /= (base - tmin);
43
44 return k + (((base - tmin + 1) * delta) / (delta + skew));
45}
46
47static inline void appendEncode(QString *output, uint delta, uint bias)
48{
49 uint qq;
50 uint k;
51 uint t;
52
53 // insert the variable length delta integer.
54 for (qq = delta, k = base;; k += base) {
55 // stop generating digits when the threshold is
56 // detected.
57 t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;
58 if (qq < t) break;
59
60 *output += QChar(encodeDigit(t + (qq - t) % (base - t)));
61 qq = (qq - t) / (base - t);
62 }
63
64 *output += QChar(encodeDigit(qq));
65}
66
68{
70 uint delta = 0;
72
73 // Do not try to encode strings that certainly will result in output
74 // that is longer than allowable domain name label length. Note that
75 // non-BMP codepoints are encoded as two QChars.
76 if (in.size() > MaxDomainLabelLength * 2)
77 return;
78
79 int outLen = output->size();
80 output->resize(outLen + in.size());
81
82 QChar *d = output->data() + outLen;
83 bool skipped = false;
84 // copy all basic code points verbatim to output.
85 for (QChar c : in) {
86 if (c.unicode() < 0x80)
87 *d++ = c;
88 else
89 skipped = true;
90 }
91
92 // if there were only basic code points, just return them
93 // directly; don't do any encoding.
94 if (!skipped)
95 return;
96
97 output->truncate(d - output->constData());
98 int copied = output->size() - outLen;
99
100 // h and b now contain the number of basic code points in input.
101 uint b = copied;
102 uint h = copied;
103
104 // if basic code points were copied, add the delimiter character.
105 if (h > 0)
106 *output += u'-';
107
108 // compute the input length in Unicode code points.
109 uint inputLength = 0;
110 for (QStringIterator iter(in); iter.hasNext();) {
111 inputLength++;
112
113 if (iter.next(char32_t(-1)) == char32_t(-1)) {
114 output->truncate(outLen);
115 return; // invalid surrogate pair
116 }
117 }
118
119 // while there are still unprocessed non-basic code points left in
120 // the input string...
121 while (h < inputLength) {
122 // find the character in the input string with the lowest unprocessed value.
123 uint m = std::numeric_limits<uint>::max();
124 for (QStringIterator iter(in); iter.hasNext();) {
125 auto c = iter.nextUnchecked();
126 static_assert(std::numeric_limits<decltype(m)>::max()
127 >= std::numeric_limits<decltype(c)>::max(),
128 "Punycode uint should be able to cover all codepoints");
129 if (c >= n && c < m)
130 m = c;
131 }
132
133 // delta = delta + (m - n) * (h + 1), fail on overflow
134 uint tmp;
135 if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) {
136 output->truncate(outLen);
137 return; // punycode_overflow
138 }
139 n = m;
140
141 for (QStringIterator iter(in); iter.hasNext();) {
142 auto c = iter.nextUnchecked();
143
144 // increase delta until we reach the character processed in this iteration;
145 // fail if delta overflows.
146 if (c < n) {
147 if (qAddOverflow<uint>(delta, 1, &delta)) {
148 output->truncate(outLen);
149 return; // punycode_overflow
150 }
151 }
152
153 if (c == n) {
154 appendEncode(output, delta, bias);
155
156 bias = adapt(delta, h + 1, h == b);
157 delta = 0;
158 ++h;
159 }
160 }
161
162 ++delta;
163 ++n;
164 }
165
166 // prepend ACE prefix
167 output->insert(outLen, "xn--"_L1);
168 return;
169}
170
172{
173 uint n = initial_n;
174 uint i = 0;
176
177 // Do not try to decode strings longer than allowable for a domain label.
178 // Non-ASCII strings are not allowed here anyway, so there is no need
179 // to account for surrogates.
180 if (pc.size() > MaxDomainLabelLength)
181 return QString();
182
183 // strip any ACE prefix
184 int start = pc.startsWith("xn--"_L1) ? 4 : 0;
185 if (!start)
186 return pc;
187
188 // find the last delimiter character '-' in the input array. copy
189 // all data before this delimiter directly to the output array.
190 int delimiterPos = pc.lastIndexOf(u'-');
191 auto output = delimiterPos < 4 ? std::u32string()
192 : pc.mid(start, delimiterPos - start).toStdU32String();
193
194 // if a delimiter was found, skip to the position after it;
195 // otherwise start at the front of the input string. everything
196 // before the delimiter is assumed to be basic code points.
197 uint cnt = delimiterPos + 1;
198
199 // loop through the rest of the input string, inserting non-basic
200 // characters into output as we go.
201 while (cnt < (uint) pc.size()) {
202 uint oldi = i;
203 uint w = 1;
204
205 // find the next index for inserting a non-basic character.
206 for (uint k = base; cnt < (uint) pc.size(); k += base) {
207 // grab a character from the punycode input and find its
208 // delta digit (each digit code is part of the
209 // variable-length integer delta)
210 uint digit = pc.at(cnt++).unicode();
211 if (digit - 48 < 10) digit -= 22;
212 else if (digit - 65 < 26) digit -= 65;
213 else if (digit - 97 < 26) digit -= 97;
214 else digit = base;
215
216 // Fail if the code point has no digit value
217 if (digit >= base)
218 return QString();
219
220 // i = i + digit * w, fail on overflow
221 uint tmp;
222 if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))
223 return QString();
224
225 // detect threshold to stop reading delta digits
226 uint t;
227 if (k <= bias) t = tmin;
228 else if (k >= bias + tmax) t = tmax;
229 else t = k - bias;
230
231 if (digit < t) break;
232
233 // w = w * (base - t), fail on overflow
234 if (qMulOverflow<uint>(w, base - t, &w))
235 return QString();
236 }
237
238 // find new bias and calculate the next non-basic code
239 // character.
240 uint outputLength = static_cast<uint>(output.length());
241 bias = adapt(i - oldi, outputLength + 1, oldi == 0);
242
243 // n = n + i div (length(output) + 1), fail on overflow
244 if (qAddOverflow<uint>(n, i / (outputLength + 1), &n))
245 return QString();
246
247 // allow the deltas to wrap around
248 i %= (outputLength + 1);
249
250 // if n is a basic code point then fail; this should not happen with
251 // correct implementation of Punycode, but check just n case.
252 if (n < initial_n) {
253 // Don't use Q_ASSERT() to avoid possibility of DoS
254 qWarning("Attempt to insert a basic codepoint. Unhandled overflow?");
255 return QString();
256 }
257
258 // Surrogates should normally be rejected later by other IDNA code.
259 // But because of Qt's use of UTF-16 to represent strings the
260 // IDNA code is not able to distinguish characters represented as pairs
261 // of surrogates from normal code points. This is why surrogates are
262 // not allowed here.
263 //
264 // Allowing surrogates would lead to non-unique (after normalization)
265 // encoding of strings with non-BMP characters.
266 //
267 // Punycode that encodes characters outside the Unicode range is also
268 // invalid and is rejected here.
269 if (QChar::isSurrogate(n) || n > QChar::LastValidCodePoint)
270 return QString();
271
272 // insert the character n at position i
273 output.insert(i, 1, static_cast<char32_t>(n));
274 ++i;
275 }
276
278}
279
280static constexpr auto idn_whitelist = qOffsetStringArray(
281 "ac", "ar", "asia", "at",
282 "biz", "br",
283 "cat", "ch", "cl", "cn", "com",
284 "de", "dk",
285 "es",
286 "fi",
287 "gr",
288 "hu",
289 "il", "info", "io", "is", "ir",
290 "jp",
291 "kr",
292 "li", "lt", "lu", "lv",
293 "museum",
294 "name", "net", "no", "nu", "nz",
295 "org",
296 "pl", "pr",
297 "se", "sh",
298 "tel", "th", "tm", "tw",
299 "ua",
300 "vn",
301 "xn--fiqs8s", // China
302 "xn--fiqz9s", // China
303 "xn--fzc2c9e2c", // Sri Lanka
304 "xn--j6w193g", // Hong Kong
305 "xn--kprw13d", // Taiwan
306 "xn--kpry57d", // Taiwan
307 "xn--mgba3a4f16a", // Iran
308 "xn--mgba3a4fra", // Iran
309 "xn--mgbaam7a8h", // UAE
310 "xn--mgbayh7gpa", // Jordan
311 "xn--mgberp4a5d4ar", // Saudi Arabia
312 "xn--ogbpf8fl", // Syria
313 "xn--p1ai", // Russian Federation
314 "xn--wgbh1c", // Egypt
315 "xn--wgbl6a", // Qatar
316 "xn--xkc2al3hye2a" // Sri Lanka
317);
318
319Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr;
320
321static bool lessThan(const QChar *a, int l, const char *c)
322{
323 const auto *uc = reinterpret_cast<const char16_t *>(a);
324 const char16_t *e = uc + l;
325
326 if (!c || *c == 0)
327 return false;
328
329 while (*c) {
330 if (uc == e || *uc != static_cast<unsigned char>(*c))
331 break;
332 ++uc;
333 ++c;
334 }
335 return uc == e ? *c : (*uc < static_cast<unsigned char>(*c));
336}
337
338static bool equal(const QChar *a, int l, const char *b)
339{
340 while (l && a->unicode() && *b) {
341 if (*a != QLatin1Char(*b))
342 return false;
343 ++a;
344 ++b;
345 --l;
346 }
347 return l == 0;
348}
349
350static bool qt_is_idn_enabled(QStringView aceDomain)
351{
352 auto idx = aceDomain.lastIndexOf(u'.');
353 if (idx == -1)
354 return false;
355
356 auto tldString = aceDomain.mid(idx + 1);
357 const auto len = tldString.size();
358
359 const QChar *tld = tldString.constData();
360
362 return user_idn_whitelist->contains(tldString);
363
364 int l = 0;
365 int r = idn_whitelist.count() - 1;
366 int i = (l + r + 1) / 2;
367
368 while (r != l) {
369 if (lessThan(tld, len, idn_whitelist.at(i)))
370 r = i - 1;
371 else
372 l = i;
373 i = (l + r + 1) / 2;
374 }
375 return equal(tld, len, idn_whitelist.at(i));
376}
377
378template<typename C>
379static inline bool isValidInNormalizedAsciiLabel(C c)
380{
381 return c == u'-' || c == u'_' || (c >= u'0' && c <= u'9') || (c >= u'a' && c <= u'z');
382}
383
384template<typename C>
385static inline bool isValidInNormalizedAsciiName(C c)
386{
387 return isValidInNormalizedAsciiLabel(c) || c == u'.';
388}
389
390/*
391 Map domain name according to algorithm in UTS #46, 4.1
392
393 Returns empty string if there are disallowed characters in the input.
394
395 Sets resultIsAscii if the result is known for sure to be all ASCII.
396*/
397static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions options,
398 bool *resultIsAscii)
399{
400 *resultIsAscii = true;
401
402 // Check if the input is already normalized ASCII first and can be returned as is.
403 int i = 0;
404 for (auto c : in) {
405 if (c.unicode() >= 0x80 || !isValidInNormalizedAsciiName(c))
406 break;
407 i++;
408 }
409
410 if (i == in.size())
411 return in;
412
414 result.reserve(in.size());
415 result.append(in.constData(), i);
416 bool allAscii = true;
417
418 for (QStringIterator iter(QStringView(in).sliced(i)); iter.hasNext();) {
419 char32_t uc = iter.next();
420
421 // Fast path for ASCII-only inputs
422 if (Q_LIKELY(uc < 0x80)) {
423 if (uc >= U'A' && uc <= U'Z')
424 uc |= 0x20; // lower-case it
425
427 result.append(static_cast<char16_t>(uc));
428 continue;
429 }
430 }
431
432 allAscii = false;
433
434 // Capital sharp S is a special case since UTR #46 revision 31 (Unicode 15.1)
435 if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) {
436 result.append(u"ss"_s);
437 continue;
438 }
439
441
443 status = options.testFlag(QUrl::AceTransitionalProcessing)
446
447 switch (status) {
449 continue;
452 for (auto c : QChar::fromUcs4(uc))
453 result.append(c);
454 break;
457 break;
458 default:
459 Q_UNREACHABLE();
460 }
461 }
462
463 *resultIsAscii = allAscii;
464 return result;
465}
466
467/*
468 Check the rules for an ASCII label.
469
470 Check the size restriction and that the label does not start or end with dashes.
471
472 The label should be nonempty.
473*/
475{
476 if (label.size() > MaxDomainLabelLength)
477 return false;
478
479 if (label.first() == u'-' || label.last() == u'-')
480 return false;
481
482 return std::all_of(label.begin(), label.end(), isValidInNormalizedAsciiLabel<QChar>);
483}
484
485namespace {
486
487class DomainValidityChecker
488{
489 bool domainNameIsBidi = false;
490 bool hadBidiErrors = false;
491 bool ignoreBidiErrors;
492
493 static constexpr char32_t ZWNJ = U'\u200C';
494 static constexpr char32_t ZWJ = U'\u200D';
495
496public:
497 DomainValidityChecker(bool ignoreBidiErrors = false) : ignoreBidiErrors(ignoreBidiErrors) { }
498 bool checkLabel(const QString &label, QUrl::AceProcessingOptions options);
499
500private:
501 static bool checkContextJRules(QStringView label);
502 static bool checkBidiRules(QStringView label);
503};
504
505} // anonymous namespace
506
507/*
508 Check CONTEXTJ rules according to RFC 5892, appendix A.1 & A.2.
509
510 Rule Set for U+200C (ZWNJ):
511
512 False;
513
514 If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
515
516 If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
517
518 (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
519
520 Rule Set for U+200D (ZWJ):
521
522 False;
523
524 If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
525
526*/
527bool DomainValidityChecker::checkContextJRules(QStringView label)
528{
529 constexpr unsigned char CombiningClassVirama = 9;
530
531 enum class State {
532 Initial,
533 LD_T, // L,D with possible following T*
534 ZWNJ_T, // ZWNJ with possible following T*
535 };
536 State regexpState = State::Initial;
537 bool previousIsVirama = false;
538
539 for (QStringIterator iter(label); iter.hasNext();) {
540 auto ch = iter.next();
541
542 if (ch == ZWJ) {
543 if (!previousIsVirama)
544 return false;
545 regexpState = State::Initial;
546 } else if (ch == ZWNJ) {
547 if (!previousIsVirama && regexpState != State::LD_T)
548 return false;
549 regexpState = previousIsVirama ? State::Initial : State::ZWNJ_T;
550 } else {
551 switch (QChar::joiningType(ch)) {
552 case QChar::Joining_Left:
553 if (regexpState == State::ZWNJ_T)
554 return false;
555 regexpState = State::LD_T;
556 break;
557 case QChar::Joining_Right:
558 regexpState = State::Initial;
559 break;
560 case QChar::Joining_Dual:
561 regexpState = State::LD_T;
562 break;
563 case QChar::Joining_Transparent:
564 break;
565 default:
566 regexpState = State::Initial;
567 break;
568 }
569 }
570
571 previousIsVirama = QChar::combiningClass(ch) == CombiningClassVirama;
572 }
573
574 return regexpState != State::ZWNJ_T;
575}
576
577/*
578 Check if the label conforms to BiDi rule of RFC 5893.
579
580 1. The first character must be a character with Bidi property L, R,
581 or AL. If it has the R or AL property, it is an RTL label; if it
582 has the L property, it is an LTR label.
583
584 2. In an RTL label, only characters with the Bidi properties R, AL,
585 AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
586
587 3. In an RTL label, the end of the label must be a character with
588 Bidi property R, AL, EN, or AN, followed by zero or more
589 characters with Bidi property NSM.
590
591 4. In an RTL label, if an EN is present, no AN may be present, and
592 vice versa.
593
594 5. In an LTR label, only characters with the Bidi properties L, EN,
595 ES, CS, ET, ON, BN, or NSM are allowed.
596
597 6. In an LTR label, the end of the label must be a character with
598 Bidi property L or EN, followed by zero or more characters with
599 Bidi property NSM.
600*/
601bool DomainValidityChecker::checkBidiRules(QStringView label)
602{
603 if (label.isEmpty())
604 return true;
605
607 Q_ASSERT(iter.hasNext());
608
609 char32_t ch = iter.next();
610 bool labelIsRTL = false;
611
612 switch (QChar::direction(ch)) {
613 case QChar::DirL:
614 break;
615 case QChar::DirR:
616 case QChar::DirAL:
617 labelIsRTL = true;
618 break;
619 default:
620 return false;
621 }
622
623 bool tailOk = true;
624 bool labelHasEN = false;
625 bool labelHasAN = false;
626
627 while (iter.hasNext()) {
628 ch = iter.next();
629
630 switch (QChar::direction(ch)) {
631 case QChar::DirR:
632 case QChar::DirAL:
633 if (!labelIsRTL)
634 return false;
635 tailOk = true;
636 break;
637
638 case QChar::DirL:
639 if (labelIsRTL)
640 return false;
641 tailOk = true;
642 break;
643
644 case QChar::DirES:
645 case QChar::DirCS:
646 case QChar::DirET:
647 case QChar::DirON:
648 case QChar::DirBN:
649 tailOk = false;
650 break;
651
652 case QChar::DirNSM:
653 break;
654
655 case QChar::DirAN:
656 if (labelIsRTL) {
657 if (labelHasEN)
658 return false;
659 labelHasAN = true;
660 tailOk = true;
661 } else {
662 return false;
663 }
664 break;
665
666 case QChar::DirEN:
667 if (labelIsRTL) {
668 if (labelHasAN)
669 return false;
670 labelHasEN = true;
671 }
672 tailOk = true;
673 break;
674
675 default:
676 return false;
677 }
678 }
679
680 return tailOk;
681}
682
683/*
684 Check if the given label is valid according to UTS #46 validity criteria.
685
686 NFC check can be skipped if the label was transformed to NFC before calling
687 this function (as optimization).
688
689 The domain name is considered invalid if this function returns false at least
690 once.
691
692 1. The label must be in Unicode Normalization Form NFC.
693 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
694 in both the third and fourth positions.
695 3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character.
696 4. The label must not contain a U+002E ( . ) FULL STOP.
697 5. The label must not begin with a combining mark, that is: General_Category=Mark.
698 6. Each code point in the label must only have certain status values according to Section 5,
699 IDNA Mapping Table:
700 1. For Transitional Processing, each value must be valid.
701 2. For Nontransitional Processing, each value must be either valid or deviation.
702 7. If CheckJoiners, the label must satisfy the ContextJ rules from Appendix A, in The Unicode
703 Code Points and Internationalized Domain Names for Applications (IDNA).
704 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy
705 all six of the numbered conditions in RFC 5893, Section 2.
706
707 NOTE: Don't use QStringView for label, so that call to QString::normalized() can avoid
708 memory allocation when there is nothing to normalize.
709*/
710bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessingOptions options)
711{
712 if (label.isEmpty())
713 return true;
714
715 if (label != label.normalized(QString::NormalizationForm_C))
716 return false;
717
718 if (label.size() >= 4) {
719 // This assumes that the first two characters are in BMP, but that's ok
720 // because non-BMP characters are unlikely to be used for specifying
721 // future extensions.
722 if (label[2] == u'-' && label[3] == u'-')
723 return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label);
724 }
725
726 if (label.startsWith(u'-') || label.endsWith(u'-'))
727 return false;
728
729 if (label.contains(u'.'))
730 return false;
731
733 auto c = iter.next();
734
735 if (QChar::isMark(c))
736 return false;
737
738 // As optimization, CONTEXTJ rules check can be skipped if no
739 // ZWJ/ZWNJ characters were found during the first pass.
740 bool hasJoiners = false;
741
742 for (;;) {
743 hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;
744
745 if (!ignoreBidiErrors && !domainNameIsBidi) {
746 switch (QChar::direction(c)) {
747 case QChar::DirR:
748 case QChar::DirAL:
749 case QChar::DirAN:
750 domainNameIsBidi = true;
751 if (hadBidiErrors)
752 return false;
753 break;
754 default:
755 break;
756 }
757 }
758
759 switch (QUnicodeTables::idnaStatus(c)) {
761 break;
763 if (options.testFlag(QUrl::AceTransitionalProcessing))
764 return false;
765 break;
766 default:
767 return false;
768 }
769
770 if (!iter.hasNext())
771 break;
772 c = iter.next();
773 }
774
775 if (hasJoiners && !checkContextJRules(label))
776 return false;
777
778 hadBidiErrors = hadBidiErrors || !checkBidiRules(label);
779
780 if (domainNameIsBidi && hadBidiErrors)
781 return false;
782
783 return true;
784}
785
787{
788 qsizetype lastIdx = 0;
789 QString aceForm; // this variable is here for caching
790 QString aceResult;
791
792 while (true) {
793 qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx);
794 if (idx == -1)
795 idx = normalizedDomain.size();
796
797 const qsizetype labelLength = idx - lastIdx;
798 if (labelLength) {
799 const auto label = normalizedDomain.sliced(lastIdx, labelLength);
800 aceForm.clear();
801 qt_punycodeEncoder(label, &aceForm);
802 if (aceForm.isEmpty())
803 return {};
804
805 aceResult.append(aceForm);
806 }
807
808 if (idx == normalizedDomain.size())
809 break;
810
811 if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0))
812 return {}; // two delimiters in a row -- empty label not allowed
813
814 lastIdx = idx + 1;
815 aceResult += u'.';
816 }
817
818 return aceResult;
819}
820
821static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot,
822 bool *usesPunycode)
823{
824 qsizetype lastIdx = 0;
825 bool hasPunycode = false;
826 *usesPunycode = false;
827
828 while (lastIdx < normalizedDomain.size()) {
829 auto idx = normalizedDomain.indexOf(u'.', lastIdx);
830 if (idx == -1)
831 idx = normalizedDomain.size();
832
833 const auto labelLength = idx - lastIdx;
834 if (labelLength == 0) {
835 if (idx == normalizedDomain.size())
836 break;
837 if (dot == ForbidLeadingDot || idx > 0)
838 return false; // two delimiters in a row -- empty label not allowed
839 } else {
840 const auto label = normalizedDomain.sliced(lastIdx, labelLength);
842 return false;
843
844 hasPunycode = hasPunycode || label.startsWith("xn--"_L1);
845 }
846
847 lastIdx = idx + 1;
848 }
849
850 *usesPunycode = hasPunycode;
851 return true;
852}
853
854static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
855{
857 result.reserve(asciiDomain.size());
858 qsizetype lastIdx = 0;
859
860 DomainValidityChecker checker;
861
862 while (true) {
863 auto idx = asciiDomain.indexOf(u'.', lastIdx);
864 if (idx == -1)
865 idx = asciiDomain.size();
866
867 const auto labelLength = idx - lastIdx;
868 if (labelLength == 0) {
869 if (idx == asciiDomain.size())
870 break;
871 } else {
872 const auto label = asciiDomain.sliced(lastIdx, labelLength);
873 const auto unicodeLabel = qt_punycodeDecoder(label);
874
875 if (unicodeLabel.isEmpty())
876 return asciiDomain;
877
878 if (!checker.checkLabel(unicodeLabel, options))
879 return asciiDomain;
880
881 result.append(unicodeLabel);
882 }
883
884 if (idx == asciiDomain.size())
885 break;
886
887 lastIdx = idx + 1;
888 result += u'.';
889 }
890 return result;
891}
892
893static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options)
894{
895 qsizetype lastIdx = 0;
896
897 DomainValidityChecker checker(true);
898
899 while (true) {
900 qsizetype idx = domainName.indexOf(u'.', lastIdx);
901 if (idx == -1)
902 idx = domainName.size();
903
904 const qsizetype labelLength = idx - lastIdx;
905 if (labelLength) {
906 const auto label = domainName.sliced(lastIdx, labelLength);
907
908 if (!checker.checkLabel(label, options))
909 return false;
910 }
911
912 if (idx == domainName.size())
913 break;
914
915 lastIdx = idx + 1;
916 }
917 return true;
918}
919
921 QUrl::AceProcessingOptions options)
922{
923 if (domain.isEmpty())
924 return {};
925
926 bool mappedToAscii;
927 const QString mapped = mapDomainName(domain, options, &mappedToAscii);
928 const QString normalized =
929 mappedToAscii ? mapped : mapped.normalized(QString::NormalizationForm_C);
930
931 if (normalized.isEmpty())
932 return {};
933
934 if (!mappedToAscii && !checkUnicodeName(normalized, options))
935 return {};
936
937 bool needsConversionToUnicode;
938 const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot);
939 if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode))
940 return {};
941
942 if (op == ToAceOnly || !needsConversionToUnicode
943 || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {
944 return aceResult;
945 }
946
947 return convertToUnicode(aceResult, options);
948}
949
961{
963 return *user_idn_whitelist;
964 static const QStringList list = [] {
966 list.reserve(idn_whitelist.count());
967 int i = 0;
968 while (i < idn_whitelist.count()) {
970 ++i;
971 }
972 return list;
973 }();
974 return list;
975}
976
1004
\inmodule QtCore
void reserve(qsizetype size)
Definition qlist.h:753
\inmodule QtCore
\inmodule QtCore
Definition qstringview.h:78
\macro QT_RESTRICTED_CAST_FROM_ASCII
Definition qstring.h:129
std::u32string toStdU32String() const
Definition qstring.h:1470
qsizetype indexOf(QLatin1StringView s, qsizetype from=0, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Definition qstring.cpp:4517
qsizetype lastIndexOf(QChar c, Qt::CaseSensitivity cs=Qt::CaseSensitive) const noexcept
Definition qstring.h:296
bool startsWith(const QString &s, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Returns true if the string starts with s; otherwise returns false.
Definition qstring.cpp:5455
QString mid(qsizetype position, qsizetype n=-1) const &
Definition qstring.cpp:5300
bool isEmpty() const noexcept
Returns true if the string has no characters; otherwise returns false.
Definition qstring.h:192
qsizetype size() const noexcept
Returns the number of characters in this string.
Definition qstring.h:186
const QChar at(qsizetype i) const
Returns the character at the given index position in the string.
Definition qstring.h:1226
static QString fromStdU32String(const std::u32string &s)
Definition qstring.h:1467
@ NormalizationForm_C
Definition qstring.h:619
@ IgnoreIDNWhitelist
Definition qurl.h:254
@ AceTransitionalProcessing
Definition qurl.h:255
static QStringList idnWhitelist()
Definition qurlidna.cpp:960
static void setIdnWhitelist(const QStringList &)
Definition qurlidna.cpp:998
Combined button and popup list for selecting options.
Q_CORE_EXPORT IdnaStatus QT_FASTCALL idnaStatus(char32_t ucs4) noexcept
Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t ucs4) noexcept
#define Q_LIKELY(x)
QList< QString > QStringList
Constructs a string list that contains the given string, str.
DBusConnection const char DBusError DBusBusType DBusError return DBusConnection DBusHandleMessageFunction void DBusFreeFunction return DBusConnection return DBusConnection return const char DBusError return DBusConnection DBusMessage dbus_uint32_t return DBusConnection dbus_bool_t DBusConnection DBusAddWatchFunction DBusRemoveWatchFunction DBusWatchToggledFunction void DBusFreeFunction return DBusConnection DBusDispatchStatusFunction void DBusFreeFunction DBusTimeout return DBusTimeout return DBusWatch return DBusWatch unsigned int return DBusError const DBusError return const DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessageIter * iter
#define qWarning
Definition qlogging.h:166
constexpr auto qOffsetStringArray(const char(&...strings)[Nx]) noexcept
GLboolean GLboolean GLboolean b
const GLfloat * m
GLfloat GLfloat GLfloat w
[0]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLboolean r
[2]
GLuint GLsizei const GLchar * label
[43]
GLuint start
GLfloat n
GLfloat GLfloat GLfloat GLfloat h
const GLubyte * c
GLfloat bias
GLdouble GLdouble t
Definition qopenglext.h:243
GLuint in
GLint GLenum GLboolean normalized
Definition qopenglext.h:752
GLuint64EXT * result
[6]
GLenum GLsizei len
static qreal dot(const QPointF &a, const QPointF &b)
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
#define Q_AUTOTEST_EXPORT
ptrdiff_t qsizetype
Definition qtypes.h:165
unsigned int uint
Definition qtypes.h:34
AceLeadingDot
Definition qurl_p.h:30
@ ForbidLeadingDot
Definition qurl_p.h:30
QString Q_CORE_EXPORT qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options={})
Definition qurlidna.cpp:920
Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
Definition qurlidna.cpp:171
Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
Definition qurlidna.cpp:67
AceOperation
Definition qurl_p.h:31
@ ToAceOnly
Definition qurl_p.h:31
static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot)
Definition qurlidna.cpp:786
static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot, bool *usesPunycode)
Definition qurlidna.cpp:821
static const uint damp
Definition qurlidna.cpp:24
static const uint initial_bias
Definition qurlidna.cpp:25
static const uint tmax
Definition qurlidna.cpp:22
static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options)
Definition qurlidna.cpp:893
static bool isValidInNormalizedAsciiName(C c)
Definition qurlidna.cpp:385
static bool qt_is_idn_enabled(QStringView aceDomain)
Definition qurlidna.cpp:350
static const uint skew
Definition qurlidna.cpp:23
static constexpr auto idn_whitelist
Definition qurlidna.cpp:280
static constexpr qsizetype MaxDomainLabelLength
Definition qurlidna.cpp:28
static const uint base
Definition qurlidna.cpp:20
static bool validateAsciiLabel(QStringView label)
Definition qurlidna.cpp:474
static bool isValidInNormalizedAsciiLabel(C c)
Definition qurlidna.cpp:379
static void appendEncode(QString *output, uint delta, uint bias)
Definition qurlidna.cpp:47
static Q_CONSTINIT QStringList * user_idn_whitelist
Definition qurlidna.cpp:319
static bool lessThan(const QChar *a, int l, const char *c)
Definition qurlidna.cpp:321
static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions options, bool *resultIsAscii)
Definition qurlidna.cpp:397
static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
Definition qurlidna.cpp:854
Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
Definition qurlidna.cpp:171
static const uint initial_n
Definition qurlidna.cpp:26
Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
Definition qurlidna.cpp:67
static bool equal(const QChar *a, int l, const char *b)
Definition qurlidna.cpp:338
static const uint tmin
Definition qurlidna.cpp:21
static uint encodeDigit(uint digit)
Definition qurlidna.cpp:30
static uint adapt(uint delta, uint numpoints, bool firsttime)
Definition qurlidna.cpp:35
QT_BEGIN_NAMESPACE typedef uchar * output
QList< int > list
[14]
\inmodule QtCore \reentrant
Definition qchar.h:18