Qt
Internal/Contributor docs for the Qt SDK. <b>Note:</b> These are NOT official API docs; those are found <a href='https://doc.qt.io/'>here</a>.
Loading...
Searching...
No Matches
qxmlutils.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include <qstring.h>
5
6#include "qxmlutils_p.h"
7
8#include <private/qtools_p.h>
9
11
12using namespace QtMiscUtils;
13
14/* TODO:
15 * - isNameChar() doesn't have to be public, it's only needed in
16 * qdom.cpp -- refactor fixedXmlName() to use isNCName()
17 * - A lot of functions can be inlined.
18 */
19
21{
22public:
25};
26typedef const QXmlCharRange *RangeIter;
27
32bool QXmlUtils::rangeContains(RangeIter begin, RangeIter end, const QChar c)
33{
34 const ushort cp(c.unicode());
35
36 // check the first two ranges "manually" as characters in that
37 // range are checked very often and we avoid the binary search below.
38
39 if (cp <= begin->max)
40 return cp >= begin->min;
41
42 ++begin;
43
44 if (begin == end)
45 return false;
46
47 if (cp <= begin->max)
48 return cp >= begin->min;
49
50 while (begin != end) {
51 qptrdiff delta = (end - begin) / 2;
52 RangeIter mid = begin + delta;
53
54 if (mid->min > cp)
55 end = mid;
56 else if (mid->max < cp)
57 begin = mid;
58 else
59 return true;
60
61 if (delta == 0)
62 break;
63 }
64
65 return false;
66}
67
68// [85] BaseChar ::= ...
69
71{
72 {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x00FF},
73 {0x0100, 0x0131}, {0x0134, 0x013E}, {0x0141, 0x0148}, {0x014A, 0x017E}, {0x0180, 0x01C3},
74 {0x01CD, 0x01F0}, {0x01F4, 0x01F5}, {0x01FA, 0x0217}, {0x0250, 0x02A8}, {0x02BB, 0x02C1},
75 {0x0386, 0x0386}, {0x0388, 0x038A}, {0x038C, 0x038C}, {0x038E, 0x03A1}, {0x03A3, 0x03CE},
76 {0x03D0, 0x03D6}, {0x03DA, 0x03DA}, {0x03DC, 0x03DC}, {0x03DE, 0x03DE}, {0x03E0, 0x03E0},
77 {0x03E2, 0x03F3}, {0x0401, 0x040C}, {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481},
78 {0x0490, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
79 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0559, 0x0559}, {0x0561, 0x0586}, {0x05D0, 0x05EA},
80 {0x05F0, 0x05F2}, {0x0621, 0x063A}, {0x0641, 0x064A}, {0x0671, 0x06B7}, {0x06BA, 0x06BE},
81 {0x06C0, 0x06CE}, {0x06D0, 0x06D3}, {0x06D5, 0x06D5}, {0x06E5, 0x06E6}, {0x0905, 0x0939},
82 {0x093D, 0x093D}, {0x0958, 0x0961}, {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8},
83 {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9}, {0x09DC, 0x09DD}, {0x09DF, 0x09E1},
84 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30},
85 {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E},
86 {0x0A72, 0x0A74}, {0x0A85, 0x0A8B}, {0x0A8D, 0x0A8D}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
87 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0ABD}, {0x0AE0, 0x0AE0},
88 {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
89 {0x0B36, 0x0B39}, {0x0B3D, 0x0B3D}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B85, 0x0B8A},
90 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F},
91 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9}, {0x0C05, 0x0C0C},
92 {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33}, {0x0C35, 0x0C39}, {0x0C60, 0x0C61},
93 {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9},
94 {0x0CDE, 0x0CDE}, {0x0CE0, 0x0CE1}, {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28},
95 {0x0D2A, 0x0D39}, {0x0D60, 0x0D61}, {0x0E01, 0x0E2E}, {0x0E30, 0x0E30}, {0x0E32, 0x0E33},
96 {0x0E40, 0x0E45}, {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E87, 0x0E88}, {0x0E8A, 0x0E8A},
97 {0x0E8D, 0x0E8D}, {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EA5, 0x0EA5},
98 {0x0EA7, 0x0EA7}, {0x0EAA, 0x0EAB}, {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB0}, {0x0EB2, 0x0EB3},
99 {0x0EBD, 0x0EBD}, {0x0EC0, 0x0EC4}, {0x0F40, 0x0F47}, {0x0F49, 0x0F69}, {0x10A0, 0x10C5},
100 {0x10D0, 0x10F6}, {0x1100, 0x1100}, {0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109},
101 {0x110B, 0x110C}, {0x110E, 0x1112}, {0x113C, 0x113C}, {0x113E, 0x113E}, {0x1140, 0x1140},
102 {0x114C, 0x114C}, {0x114E, 0x114E}, {0x1150, 0x1150}, {0x1154, 0x1155}, {0x1159, 0x1159},
103 {0x115F, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165}, {0x1167, 0x1167}, {0x1169, 0x1169},
104 {0x116D, 0x116E}, {0x1172, 0x1173}, {0x1175, 0x1175}, {0x119E, 0x119E}, {0x11A8, 0x11A8},
105 {0x11AB, 0x11AB}, {0x11AE, 0x11AF}, {0x11B7, 0x11B8}, {0x11BA, 0x11BA}, {0x11BC, 0x11C2},
106 {0x11EB, 0x11EB}, {0x11F0, 0x11F0}, {0x11F9, 0x11F9}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9},
107 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57},
108 {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4},
109 {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3},
110 {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2126, 0x2126},
111 {0x212A, 0x212B}, {0x212E, 0x212E}, {0x2180, 0x2182}, {0x3041, 0x3094}, {0x30A1, 0x30FA},
112 {0x3105, 0x312C}, {0xAC00, 0xD7A3}
113};
114static const RangeIter g_base_end = g_base_begin + sizeof(g_base_begin) / sizeof(QXmlCharRange);
115
117{
118 {0x3007, 0x3007}, {0x3021, 0x3029}, {0x4E00, 0x9FA5}
119};
121
122bool QXmlUtils::isIdeographic(const QChar c)
123{
124 return rangeContains(g_ideographic_begin, g_ideographic_end, c);
125}
126
128{
129 {0x0300, 0x0345}, {0x0360, 0x0361}, {0x0483, 0x0486}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
130 {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C4}, {0x064B, 0x0652},
131 {0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DD, 0x06DF}, {0x06E0, 0x06E4}, {0x06E7, 0x06E8},
132 {0x06EA, 0x06ED}, {0x0901, 0x0903}, {0x093C, 0x093C}, {0x093E, 0x094C}, {0x094D, 0x094D},
133 {0x0951, 0x0954}, {0x0962, 0x0963}, {0x0981, 0x0983}, {0x09BC, 0x09BC}, {0x09BE, 0x09BE},
134 {0x09BF, 0x09BF}, {0x09C0, 0x09C4}, {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09D7, 0x09D7},
135 {0x09E2, 0x09E3}, {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A3E}, {0x0A3F, 0x0A3F},
136 {0x0A40, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A83},
137 {0x0ABC, 0x0ABC}, {0x0ABE, 0x0AC5}, {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03},
138 {0x0B3C, 0x0B3C}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B56, 0x0B57},
139 {0x0B82, 0x0B83}, {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD7, 0x0BD7},
140 {0x0C01, 0x0C03}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
141 {0x0C82, 0x0C83}, {0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6},
142 {0x0D02, 0x0D03}, {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D57, 0x0D57},
143 {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EB9},
144 {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
145 {0x0F39, 0x0F39}, {0x0F3E, 0x0F3E}, {0x0F3F, 0x0F3F}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B},
146 {0x0F90, 0x0F95}, {0x0F97, 0x0F97}, {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x0FB9, 0x0FB9},
147 {0x20D0, 0x20DC}, {0x20E1, 0x20E1}, {0x302A, 0x302F}, {0x3099, 0x3099}, {0x309A, 0x309A}
148};
150
151bool QXmlUtils::isCombiningChar(const QChar c)
152{
153 return rangeContains(g_combining_begin, g_combining_end, c);
154}
155
156// [88] Digit ::= ...
158{
159 {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF},
160 {0x0A66, 0x0A6F}, {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
161 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9}, {0x0F20, 0x0F29}
162};
164
165bool QXmlUtils::isDigit(const QChar c)
166{
167 return rangeContains(g_digit_begin, g_digit_end, c);
168}
169
170// [89] Extender ::= ...
172{
173 {0x00B7, 0x00B7}, {0x02D0, 0x02D0}, {0x02D1, 0x02D1}, {0x0387, 0x0387}, {0x0640, 0x0640},
174 {0x0E46, 0x0E46}, {0x0EC6, 0x0EC6}, {0x3005, 0x3005}, {0x3031, 0x3035}, {0x309D, 0x309E},
175 {0x30FC, 0x30FE}
176};
178
179bool QXmlUtils::isExtender(const QChar c)
180{
181 return rangeContains(g_extender_begin, g_extender_end, c);
182}
183
184bool QXmlUtils::isBaseChar(const QChar c)
185{
186 return rangeContains(g_base_begin, g_base_end, c);
187}
188
199{
200 // Valid encoding names are given by "[A-Za-z][A-Za-z0-9._\\-]*"
201 if (encName.isEmpty())
202 return false;
203 const auto first = encName.front().unicode();
205 return false;
206 for (QChar ch : encName.mid(1)) {
207 const auto cp = ch.unicode();
208 if (isAsciiLetterOrNumber(cp) || cp == '.' || cp == '_' || cp == '-')
209 continue;
210 return false;
211 }
212 return true;
213}
214
225{
226 return isBaseChar(c) || isIdeographic(c);
227}
228
238bool QXmlUtils::isChar(const char32_t c)
239{
240 // The valid range is defined by https://www.w3.org/TR/REC-xml/#NT-Char as following:
241 // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
242 return (c >= 0x0020 && c <= 0xD7FF)
243 || c == 0x0009
244 || c == 0x000A
245 || c == 0x000D
246 || (c >= 0xE000 && c <= 0xFFFD)
247 || (c >= 0x10000 && c <= 0x10FFFF);
248}
249
261{
262 return isBaseChar(c)
263 || isDigit(c)
264 || c.unicode() == '.'
265 || c.unicode() == '-'
266 || c.unicode() == '_'
267 || c.unicode() == ':'
268 || isCombiningChar(c)
269 || isIdeographic(c)
270 || isExtender(c);
271}
272
284{
285 for (QChar ch : candidate) {
286 const ushort cp = ch.unicode();
287
288 if (isAsciiLetterOrNumber(cp))
289 continue;
290
291 switch (cp)
292 {
293 /* Fallthrough all these. */
294 case 0x20:
295 case 0x0D:
296 case 0x0A:
297 case '-':
298 case '\'':
299 case '(':
300 case ')':
301 case '+':
302 case ',':
303 case '.':
304 case '/':
305 case ':':
306 case '=':
307 case '?':
308 case ';':
309 case '!':
310 case '*':
311 case '#':
312 case '@':
313 case '$':
314 case '_':
315 case '%':
316 continue;
317 default:
318 return false;
319 }
320 }
321
322 return true;
323}
324
336{
337 if (ncName.isEmpty())
338 return false;
339
340 const QChar first(ncName.at(0));
341
342 if (!QXmlUtils::isLetter(first) && first.unicode() != '_' && first.unicode() != ':')
343 return false;
344
345 for (QChar at : ncName) {
346 if (!QXmlUtils::isNameChar(at) || at == u':')
347 return false;
348 }
349
350 return true;
351}
352
\inmodule QtCore
\inmodule QtCore
Definition qstringview.h:78
static bool isChar(const char32_t c)
static bool isPublicID(QStringView candidate)
static bool isNCName(QStringView ncName)
static bool isEncName(QStringView encName)
static bool isLetter(const QChar c)
static bool isNameChar(const QChar c)
Combined button and popup list for selecting options.
constexpr bool isAsciiLower(char32_t c) noexcept
Definition qtools_p.h:77
constexpr bool isAsciiLetterOrNumber(char32_t c) noexcept
Definition qtools_p.h:82
constexpr bool isAsciiUpper(char32_t c) noexcept
Definition qtools_p.h:72
GLuint GLuint end
GLint first
const GLubyte * c
QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
ptrdiff_t qptrdiff
Definition qtypes.h:164
unsigned short ushort
Definition qtypes.h:33
const QXmlCharRange * RangeIter
Definition qxmlutils.cpp:26
static const QXmlCharRange g_combining_begin[]
static const RangeIter g_base_end
static const QXmlCharRange g_base_begin[]
Definition qxmlutils.cpp:70
static const RangeIter g_ideographic_end
static const RangeIter g_extender_end
static const QXmlCharRange g_extender_begin[]
static const RangeIter g_combining_end
static const QXmlCharRange g_ideographic_begin[]
static const QXmlCharRange g_digit_begin[]
static const RangeIter g_digit_end
QAction * at