Qt
Internal/Contributor docs for the Qt SDK. <b>Note:</b> These are NOT official API docs; those are found <a href='https://doc.qt.io/'>here</a>.
Loading...
Searching...
No Matches
qunicodetools.cpp File Reference

(c26994ff1551aa5450383cc51bed9b4d39f973f7)

#include "qunicodetools_p.h"
#include "qunicodetables_p.h"
#include "qvarlengtharray.h"
#include <limits.h>
+ Include dependency graph for qunicodetools.cpp:

Go to the source code of this file.

Namespaces

namespace  QUnicodeTools
 
namespace  QUnicodeTools::GB
 
namespace  QUnicodeTools::WB
 
namespace  QUnicodeTools::SB
 
namespace  QUnicodeTools::LB
 
namespace  QUnicodeTools::LB::NS
 
namespace  QUnicodeTools::Tailored
 

Macros

#define FLAG(x)   (1 << (x))
 
#define IDEBUG   if constexpr (1) ; else qDebug
 
#define tibetan_form(c)    ((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther)
 
#define MMDEBUG
 
#define KHDEBUG
 

Typedefs

using QUnicodeTools::GB::GBTableEntryType = quint16
 
using QUnicodeTools::Tailored::CharAttributeFunction = void (*)(QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
 
typedef int QUnicodeTools::Tailored::MymrCharClass
 
typedef unsigned long QUnicodeTools::Tailored::KhmerCharClass
 

Enumerations

enum class  QUnicodeTools::GB::State : uchar { QUnicodeTools::GB::Normal , QUnicodeTools::GB::GB11_ExtPicExt , QUnicodeTools::GB::GB11_ExtPicExtZWJ , QUnicodeTools::GB::GB12_13_RI }
 
enum  QUnicodeTools::WB::Action { QUnicodeTools::WB::NoBreak , QUnicodeTools::WB::Break , QUnicodeTools::WB::Lookup , QUnicodeTools::WB::LookupW }
 
enum  QUnicodeTools::SB::State {
  QUnicodeTools::SB::Initial , QUnicodeTools::SB::Lower , QUnicodeTools::SB::Upper , QUnicodeTools::SB::LUATerm ,
  QUnicodeTools::SB::ATerm , QUnicodeTools::SB::ATermC , QUnicodeTools::SB::ACS , QUnicodeTools::SB::STerm ,
  QUnicodeTools::SB::STermC , QUnicodeTools::SB::SCS , QUnicodeTools::SB::BAfterC , QUnicodeTools::SB::BAfter ,
  QUnicodeTools::SB::Break , QUnicodeTools::SB::Lookup
}
 
enum  QUnicodeTools::LB::NS::Action { QUnicodeTools::LB::NS::None , QUnicodeTools::LB::NS::Start , QUnicodeTools::LB::NS::Continue , QUnicodeTools::LB::NS::Break }
 
enum  QUnicodeTools::LB::NS::Class {
  QUnicodeTools::LB::NS::XX , QUnicodeTools::LB::NS::PRPO , QUnicodeTools::LB::NS::OPHY , QUnicodeTools::LB::NS::NU ,
  QUnicodeTools::LB::NS::SYIS , QUnicodeTools::LB::NS::CLCP
}
 
enum  QUnicodeTools::LB::Action {
  QUnicodeTools::LB::ProhibitedBreak , QUnicodeTools::LB::PB = ProhibitedBreak , QUnicodeTools::LB::DirectBreak , QUnicodeTools::LB::DB = DirectBreak ,
  QUnicodeTools::LB::IndirectBreak , QUnicodeTools::LB::IB = IndirectBreak , QUnicodeTools::LB::CombiningIndirectBreak , QUnicodeTools::LB::CI = CombiningIndirectBreak ,
  QUnicodeTools::LB::CombiningProhibitedBreak , QUnicodeTools::LB::CP = CombiningProhibitedBreak , QUnicodeTools::LB::ProhibitedBreakAfterHebrewPlusHyphen , QUnicodeTools::LB::HH = ProhibitedBreakAfterHebrewPlusHyphen ,
  QUnicodeTools::LB::IndirectBreakIfNarrow , QUnicodeTools::LB::IN = IndirectBreakIfNarrow
}
 
enum  QUnicodeTools::Tailored::Form {
  QUnicodeTools::Tailored::Invalid = 0x0 , QUnicodeTools::Tailored::UnknownForm = Invalid , QUnicodeTools::Tailored::Consonant , QUnicodeTools::Tailored::Nukta ,
  QUnicodeTools::Tailored::Halant , QUnicodeTools::Tailored::Matra , QUnicodeTools::Tailored::VowelMark , QUnicodeTools::Tailored::StressMark ,
  QUnicodeTools::Tailored::IndependentVowel , QUnicodeTools::Tailored::LengthMark , QUnicodeTools::Tailored::Control , QUnicodeTools::Tailored::Other
}
 
enum  QUnicodeTools::Tailored::TibetanForm {
  QUnicodeTools::Tailored::TibetanOther , QUnicodeTools::Tailored::TibetanHeadConsonant , QUnicodeTools::Tailored::TibetanSubjoinedConsonant , QUnicodeTools::Tailored::TibetanSubjoinedVowel ,
  QUnicodeTools::Tailored::TibetanVowel
}
 
enum  QUnicodeTools::Tailored::MymrCharClassValues {
  QUnicodeTools::Tailored::Mymr_CC_RESERVED = 0 , QUnicodeTools::Tailored::Mymr_CC_CONSONANT = 1 , QUnicodeTools::Tailored::Mymr_CC_CONSONANT2 = 2 , QUnicodeTools::Tailored::Mymr_CC_NGA = 3 ,
  QUnicodeTools::Tailored::Mymr_CC_YA = 4 , QUnicodeTools::Tailored::Mymr_CC_RA = 5 , QUnicodeTools::Tailored::Mymr_CC_WA = 6 , QUnicodeTools::Tailored::Mymr_CC_HA = 7 ,
  QUnicodeTools::Tailored::Mymr_CC_IND_VOWEL = 8 , QUnicodeTools::Tailored::Mymr_CC_ZERO_WIDTH_NJ_MARK = 9 , QUnicodeTools::Tailored::Mymr_CC_VIRAMA = 10 , QUnicodeTools::Tailored::Mymr_CC_PRE_VOWEL = 11 ,
  QUnicodeTools::Tailored::Mymr_CC_BELOW_VOWEL = 12 , QUnicodeTools::Tailored::Mymr_CC_ABOVE_VOWEL = 13 , QUnicodeTools::Tailored::Mymr_CC_POST_VOWEL = 14 , QUnicodeTools::Tailored::Mymr_CC_SIGN_ABOVE = 15 ,
  QUnicodeTools::Tailored::Mymr_CC_SIGN_BELOW = 16 , QUnicodeTools::Tailored::Mymr_CC_SIGN_AFTER = 17 , QUnicodeTools::Tailored::Mymr_CC_ZERO_WIDTH_J_MARK = 18 , QUnicodeTools::Tailored::Mymr_CC_COUNT = 19
}
 
enum  QUnicodeTools::Tailored::MymrCharClassFlags {
  QUnicodeTools::Tailored::Mymr_CF_CLASS_MASK = 0x0000FFFF , QUnicodeTools::Tailored::Mymr_CF_CONSONANT = 0x01000000 , QUnicodeTools::Tailored::Mymr_CF_MEDIAL = 0x02000000 , QUnicodeTools::Tailored::Mymr_CF_IND_VOWEL = 0x04000000 ,
  QUnicodeTools::Tailored::Mymr_CF_DEP_VOWEL = 0x08000000 , QUnicodeTools::Tailored::Mymr_CF_DOTTED_CIRCLE = 0x10000000 , QUnicodeTools::Tailored::Mymr_CF_VIRAMA = 0x20000000 , QUnicodeTools::Tailored::Mymr_CF_POS_BEFORE = 0x00080000 ,
  QUnicodeTools::Tailored::Mymr_CF_POS_BELOW = 0x00040000 , QUnicodeTools::Tailored::Mymr_CF_POS_ABOVE = 0x00020000 , QUnicodeTools::Tailored::Mymr_CF_POS_AFTER = 0x00010000 , QUnicodeTools::Tailored::Mymr_CF_POS_MASK = 0x000f0000 ,
  QUnicodeTools::Tailored::Mymr_CF_AFTER_KINZI = 0x00100000
}
 
enum  QUnicodeTools::Tailored::MymrChar {
  QUnicodeTools::Tailored::Mymr_C_SIGN_ZWNJ = 0x200C , QUnicodeTools::Tailored::Mymr_C_SIGN_ZWJ = 0x200D , QUnicodeTools::Tailored::Mymr_C_DOTTED_CIRCLE = 0x25CC , QUnicodeTools::Tailored::Mymr_C_RA = 0x101B ,
  QUnicodeTools::Tailored::Mymr_C_YA = 0x101A , QUnicodeTools::Tailored::Mymr_C_NGA = 0x1004 , QUnicodeTools::Tailored::Mymr_C_VOWEL_E = 0x1031 , QUnicodeTools::Tailored::Mymr_C_VIRAMA = 0x1039
}
 
enum  {
  QUnicodeTools::Tailored::Mymr_xx = Mymr_CC_RESERVED , QUnicodeTools::Tailored::Mymr_c1 = Mymr_CC_CONSONANT | Mymr_CF_CONSONANT | Mymr_CF_POS_BELOW , QUnicodeTools::Tailored::Mymr_c2 = Mymr_CC_CONSONANT2 | Mymr_CF_CONSONANT , QUnicodeTools::Tailored::Mymr_ng = Mymr_CC_NGA | Mymr_CF_CONSONANT | Mymr_CF_POS_ABOVE ,
  QUnicodeTools::Tailored::Mymr_ya = Mymr_CC_YA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_AFTER | Mymr_CF_AFTER_KINZI , QUnicodeTools::Tailored::Mymr_ra = Mymr_CC_RA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BEFORE , QUnicodeTools::Tailored::Mymr_wa = Mymr_CC_WA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW , QUnicodeTools::Tailored::Mymr_ha = Mymr_CC_HA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW ,
  QUnicodeTools::Tailored::Mymr_id = Mymr_CC_IND_VOWEL | Mymr_CF_IND_VOWEL , QUnicodeTools::Tailored::Mymr_vi = Mymr_CC_VIRAMA | Mymr_CF_VIRAMA | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE , QUnicodeTools::Tailored::Mymr_dl = Mymr_CC_PRE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BEFORE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI , QUnicodeTools::Tailored::Mymr_db = Mymr_CC_BELOW_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI ,
  QUnicodeTools::Tailored::Mymr_da = Mymr_CC_ABOVE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI , QUnicodeTools::Tailored::Mymr_dr = Mymr_CC_POST_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI , QUnicodeTools::Tailored::Mymr_sa = Mymr_CC_SIGN_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_ABOVE | Mymr_CF_AFTER_KINZI , QUnicodeTools::Tailored::Mymr_sb = Mymr_CC_SIGN_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_BELOW | Mymr_CF_AFTER_KINZI ,
  QUnicodeTools::Tailored::Mymr_sp = Mymr_CC_SIGN_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI
}
 
enum  QUnicodeTools::Tailored::KhmerCharClassValues {
  QUnicodeTools::Tailored::CC_RESERVED = 0 , QUnicodeTools::Tailored::CC_CONSONANT = 1 , QUnicodeTools::Tailored::CC_CONSONANT2 = 2 , QUnicodeTools::Tailored::CC_CONSONANT3 = 3 ,
  QUnicodeTools::Tailored::CC_ZERO_WIDTH_NJ_MARK = 4 , QUnicodeTools::Tailored::CC_CONSONANT_SHIFTER = 5 , QUnicodeTools::Tailored::CC_ROBAT = 6 , QUnicodeTools::Tailored::CC_COENG = 7 ,
  QUnicodeTools::Tailored::CC_DEPENDENT_VOWEL = 8 , QUnicodeTools::Tailored::CC_SIGN_ABOVE = 9 , QUnicodeTools::Tailored::CC_SIGN_AFTER = 10 , QUnicodeTools::Tailored::CC_ZERO_WIDTH_J_MARK = 11 ,
  QUnicodeTools::Tailored::CC_COUNT = 12
}
 
enum  QUnicodeTools::Tailored::KhmerCharClassFlags {
  QUnicodeTools::Tailored::CF_CLASS_MASK = 0x0000FFFF , QUnicodeTools::Tailored::CF_CONSONANT = 0x01000000 , QUnicodeTools::Tailored::CF_SPLIT_VOWEL = 0x02000000 , QUnicodeTools::Tailored::CF_DOTTED_CIRCLE = 0x04000000 ,
  QUnicodeTools::Tailored::CF_COENG = 0x08000000 , QUnicodeTools::Tailored::CF_SHIFTER = 0x10000000 , QUnicodeTools::Tailored::CF_ABOVE_VOWEL = 0x20000000 , QUnicodeTools::Tailored::CF_POS_BEFORE = 0x00080000 ,
  QUnicodeTools::Tailored::CF_POS_BELOW = 0x00040000 , QUnicodeTools::Tailored::CF_POS_ABOVE = 0x00020000 , QUnicodeTools::Tailored::CF_POS_AFTER = 0x00010000 , QUnicodeTools::Tailored::CF_POS_MASK = 0x000f0000
}
 
enum  QUnicodeTools::Tailored::KhmerChar {
  QUnicodeTools::Tailored::C_SIGN_ZWNJ = 0x200C , QUnicodeTools::Tailored::C_SIGN_ZWJ = 0x200D , QUnicodeTools::Tailored::C_RO = 0x179A , QUnicodeTools::Tailored::C_VOWEL_AA = 0x17B6 ,
  QUnicodeTools::Tailored::C_SIGN_NIKAHIT = 0x17C6 , QUnicodeTools::Tailored::C_VOWEL_E = 0x17C1 , QUnicodeTools::Tailored::C_COENG = 0x17D2
}
 
enum  {
  QUnicodeTools::Tailored::_xx = CC_RESERVED , QUnicodeTools::Tailored::_sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE , QUnicodeTools::Tailored::_sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER , QUnicodeTools::Tailored::_c1 = CC_CONSONANT | CF_CONSONANT ,
  QUnicodeTools::Tailored::_c2 = CC_CONSONANT2 | CF_CONSONANT , QUnicodeTools::Tailored::_c3 = CC_CONSONANT3 | CF_CONSONANT , QUnicodeTools::Tailored::_rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE , QUnicodeTools::Tailored::_cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER ,
  QUnicodeTools::Tailored::_dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE , QUnicodeTools::Tailored::_db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE , QUnicodeTools::Tailored::_da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL , QUnicodeTools::Tailored::_dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE ,
  QUnicodeTools::Tailored::_co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE , QUnicodeTools::Tailored::_va = _da | CF_SPLIT_VOWEL , QUnicodeTools::Tailored::_vr = _dr | CF_SPLIT_VOWEL
}
 
enum  QUnicodeTools::Tailored::KhmerCharClassesRange { QUnicodeTools::Tailored::KhmerFirstChar = 0x1780 , QUnicodeTools::Tailored::KhmerLastChar = 0x17df }
 

Functions

static bool QUnicodeTools::GB::shouldBreakBetweenClasses (QUnicodeTables::GraphemeBreakClass first, QUnicodeTables::GraphemeBreakClass second)
 
static void QUnicodeTools::getGraphemeBreaks (const char16_t *string, qsizetype len, QCharAttributes *attributes)
 
static void QUnicodeTools::getWordBreaks (const char16_t *string, qsizetype len, QCharAttributes *attributes)
 
static void QUnicodeTools::getSentenceBreaks (const char16_t *string, qsizetype len, QCharAttributes *attributes)
 
Class QUnicodeTools::LB::NS::toClass (QUnicodeTables::LineBreakClass lbc, QChar::Category category)
 
static void QUnicodeTools::getLineBreaks (const char16_t *string, qsizetype len, QCharAttributes *attributes, QUnicodeTools::CharAttributeOptions options)
 
static void QUnicodeTools::getWhiteSpaces (const char16_t *string, qsizetype len, QCharAttributes *attributes)
 
static Form QUnicodeTools::Tailored::form (unsigned short uc)
 
static qsizetype QUnicodeTools::Tailored::indic_nextSyllableBoundary (QChar::Script script, const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
 
static void QUnicodeTools::Tailored::indicAttributes (QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
 
static void QUnicodeTools::Tailored::thaiAttributes (QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
 
static qsizetype QUnicodeTools::Tailored::tibetan_nextSyllableBoundary (const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
 
static void QUnicodeTools::Tailored::tibetanAttributes (QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
 
static MymrCharClass QUnicodeTools::Tailored::getMyanmarCharClass (ushort ch)
 
static qsizetype QUnicodeTools::Tailored::myanmar_nextSyllableBoundary (const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
 
static void QUnicodeTools::Tailored::myanmarAttributes (QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
 
static KhmerCharClass QUnicodeTools::Tailored::getKhmerCharClass (ushort uc)
 
static qsizetype QUnicodeTools::Tailored::khmer_nextSyllableBoundary (const char16_t *s, qsizetype start, qsizetype end, bool *invalid)
 
static void QUnicodeTools::Tailored::khmerAttributes (QChar::Script script, const char16_t *text, qsizetype from, qsizetype len, QCharAttributes *attributes)
 
static void QUnicodeTools::Tailored::getCharAttributes (const char16_t *string, qsizetype stringLength, const QUnicodeTools::ScriptItem *items, qsizetype numItems, QCharAttributes *attributes)
 
Q_CORE_EXPORT void QUnicodeTools::initCharAttributes (QStringView string, const ScriptItem *items, qsizetype numItems, QCharAttributes *attributes, CharAttributeOptions options)
 
Q_CORE_EXPORT void QUnicodeTools::initScripts (QStringView string, ScriptItemArray *scripts)
 

Variables

constexpr int qt_initcharattributes_default_algorithm_only = 0
 
static const GBTableEntryType QUnicodeTools::GB::Extend_SpacingMark_ZWJ
 
static const GBTableEntryType QUnicodeTools::GB::HardBreak = 0u
 
static const GBTableEntryType QUnicodeTools::GB::breakTable [QUnicodeTables::NumGraphemeBreakClasses]
 
static const uchar QUnicodeTools::WB::breakTable [QUnicodeTables::NumWordBreakClasses][QUnicodeTables::NumWordBreakClasses]
 
static const uchar QUnicodeTools::SB::breakTable [BAfter+1][QUnicodeTables::NumSentenceBreakClasses]
 
static const uchar QUnicodeTools::LB::NS::actionTable [CLCP+1][CLCP+1]
 
static const uchar QUnicodeTools::LB::breakTable [QUnicodeTables::LineBreak_ZWJ][QUnicodeTables::LineBreak_ZWJ]
 
static const unsigned char QUnicodeTools::Tailored::indicForms [0xe00-0x900]
 
static const unsigned char QUnicodeTools::Tailored::tibetanForm [0x80]
 
static const MymrCharClass QUnicodeTools::Tailored::mymrCharClasses []
 
static const signed char QUnicodeTools::Tailored::mymrStateTable [][Mymr_CC_COUNT]
 
static const KhmerCharClass QUnicodeTools::Tailored::khmerCharClasses []
 
static const signed char QUnicodeTools::Tailored::khmerStateTable [][CC_COUNT]
 
const CharAttributeFunction QUnicodeTools::Tailored::charAttributeFunction []
 

Macro Definition Documentation

◆ FLAG

#define FLAG ( x)    (1 << (x))

Definition at line 14 of file qunicodetools.cpp.

◆ IDEBUG

#define IDEBUG   if constexpr (1) ; else qDebug

◆ KHDEBUG

#define KHDEBUG
Value:
if (0) \
printf

Definition at line 2242 of file qunicodetools.cpp.

Referenced by QUnicodeTools::Tailored::khmer_nextSyllableBoundary().

◆ MMDEBUG

#define MMDEBUG
Value:
if (0) \
printf

Definition at line 1908 of file qunicodetools.cpp.

Referenced by QUnicodeTools::Tailored::myanmar_nextSyllableBoundary().

◆ tibetan_form

#define tibetan_form ( c)     ((c) >= 0x0f40 && (c) < 0x0fc0 ? (TibetanForm)tibetanForm[(c) - 0x0f40] : TibetanOther)

Variable Documentation

◆ qt_initcharattributes_default_algorithm_only

constexpr int qt_initcharattributes_default_algorithm_only = 0
constexpr