#pragma once #include "CoreTypes.h" #include "Templates/Optional.h" #include "TypeTraits/TypeTraits.h" #include "Miscellaneous/AssertionMacros.h" #include #include NAMESPACE_REDCRAFT_BEGIN NAMESPACE_MODULE_BEGIN(Redcraft) NAMESPACE_MODULE_BEGIN(Utility) template concept CCharType = CSameAs || CSameAs || CSameAs || CSameAs || CSameAs; NAMESPACE_PRIVATE_BEGIN template struct TLiteral; template <> struct TLiteral { NODISCARD FORCEINLINE static constexpr char Select(const char X, const wchar , const u8char , const u16char , const u32char ) { return X; } NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar*, const u8char*, const u16char*, const u32char*) { return X; } }; template <> struct TLiteral { NODISCARD FORCEINLINE static constexpr wchar Select(const char , const wchar X, const u8char , const u16char , const u32char ) { return X; } NODISCARD FORCEINLINE static constexpr const wchar* Select(const char*, const wchar* X, const u8char*, const u16char*, const u32char*) { return X; } }; template <> struct TLiteral { NODISCARD FORCEINLINE static constexpr u8char Select(const char , const wchar , const u8char X, const u16char , const u32char ) { return X; } NODISCARD FORCEINLINE static constexpr const u8char* Select(const char*, const wchar*, const u8char* X, const u16char*, const u32char*) { return X; } }; template <> struct TLiteral { NODISCARD FORCEINLINE static constexpr u16char Select(const char , const wchar , const u8char , const u16char X, const u32char ) { return X; } NODISCARD FORCEINLINE static constexpr const u16char* Select(const char*, const wchar*, const u8char*, const u16char* X, const u32char*) { return X; } }; template <> struct TLiteral { NODISCARD FORCEINLINE static constexpr u32char Select(const char , const wchar , const u8char , const u16char , const u32char X) { return X; } NODISCARD FORCEINLINE static constexpr const u32char* Select(const char*, const wchar*, const u8char*, const u16char*, const u32char* X) { return X; } }; NAMESPACE_PRIVATE_END /** Templated literal struct to allow selection of string literals based on the character type provided, and not on compiler switches. */ #define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral::Select(TEXT(StringLiteral), WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral)) /** Set of utility functions operating on a single character. Implemented based on user-preferred locale and ISO 30112 "i18n". */ template struct TChar { using CharType = T; /** The maximum number of code units required to represent a single character. if unknown, guess 1. */ static constexpr size_t MaxCodeUnitLength = CSameAs ? MB_LEN_MAX : CSameAs ? PLATFORM_WINDOWS ? 2 : PLATFORM_LINUX ? 1 : 1 : CSameAs ? 4 : CSameAs ? 2 : CSameAs ? 1 : 1; /** Whether the character type is fixed-length. */ static constexpr bool bIsFixedLength = MaxCodeUnitLength == 1; NODISCARD FORCEINLINE static constexpr bool IsValid(CharType InChar) { if constexpr (CSameAs) { if ((InChar & 0b10000000) == 0b00000000) return true; return false; } else if constexpr (CSameAs || CSameAs) { if (InChar >= 0xD800 && InChar <= 0xDBFF) return false; if (InChar >= 0xDC00 && InChar <= 0xDFFF) return false; return InChar <= 0x10FFFF; } // Windows uses UTF-16 encoding for wchar. else if constexpr (PLATFORM_WINDOWS && (CSameAs)) { return TChar::IsValid(static_cast(InChar)); } // Linux uses UTF-32 encoding for wchar. else if constexpr (PLATFORM_LINUX && (CSameAs)) { return TChar::IsValid(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsNonch(CharType InChar) { if constexpr (CSameAs) { return false; } else if constexpr (CSameAs) { if (InChar >= U16TEXT('\uFDD0') && InChar <= U16TEXT('\uFDEF')) return true; if (InChar == U16TEXT('\uFFFE')) return true; if (InChar == U16TEXT('\uFFFF')) return true; return false; } else if constexpr (CSameAs) { if (InChar >= U32TEXT('\uFDD0') && InChar <= U32TEXT('\uFDEF')) return true; if ((InChar & 0x0000FFFE) == 0x0000FFFE) return TChar::IsValid(InChar); return false; } // Windows uses UTF-16 encoding for wchar. else if constexpr (PLATFORM_WINDOWS && (CSameAs)) { return TChar::IsNonch(static_cast(InChar)); } // Linux uses UTF-32 encoding for wchar. else if constexpr (PLATFORM_LINUX && (CSameAs)) { return TChar::IsNonch(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsAlnum(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isalnum(InChar, Loc); } else { return TChar::IsAlpha(InChar) || TChar::IsDigit(InChar); } } NODISCARD FORCEINLINE static constexpr bool IsAlpha(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isalpha(InChar, Loc); } else if constexpr (CSameAs) { /* * BASIC LATIN * ..;..; */ if ((InChar >= U8TEXT('\u0041') && InChar <= U8TEXT('\u005A')) || (InChar >= U8TEXT('\u0061') && InChar <= U8TEXT('\u007A'))) return true; return false; } else if constexpr (CSameAs || CSameAs) { checkf(InChar <= LITERAL(CharType, '\u007F'), TEXT("TChar::IsAlpha() only supports basic latin block.")); if (InChar > LITERAL(CharType, '\u007F')) return false; return TChar::IsAlpha(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsLower(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::islower(InChar, Loc); } else if constexpr (CSameAs) { /* * BASIC LATIN * ..; */ if (InChar >= U8TEXT('\u0061') && InChar <= U8TEXT('\u007A')) return true; return false; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return TChar::IsLower(static_cast(InChar)); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return TChar::IsLower(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsUpper(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isupper(InChar, Loc); } else if constexpr (CSameAs) { /* * BASIC LATIN * ..; */ if (InChar >= U8TEXT('\u0041') && InChar <= U8TEXT('\u005A')) return true; return false; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return TChar::IsUpper(static_cast(InChar)); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return TChar::IsUpper(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsDigit(CharType InChar) { /* ..; */ return (InChar >= LITERAL(CharType, '0') && InChar <= LITERAL(CharType, '9')); } NODISCARD FORCEINLINE static constexpr bool IsDigit(CharType InChar, int Base) { checkf(Base >= 2 && Base <= 36, TEXT("Base must be in the range [2, 36].")); /* ..;..;..; */ return (InChar >= LITERAL(CharType, '0') && InChar < LITERAL(CharType, '0') + Base ) || (InChar >= LITERAL(CharType, 'a') && InChar < LITERAL(CharType, 'a') + Base - 10) || (InChar >= LITERAL(CharType, 'A') && InChar < LITERAL(CharType, 'A') + Base - 10); } NODISCARD FORCEINLINE static constexpr bool IsCntrl(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::iscntrl(InChar, Loc); } else if constexpr (CSameAs) { /* ..;; */ return (InChar >= U8TEXT('\u0000') && InChar <= U8TEXT('\u001F')) || InChar == U8TEXT('\u007F'); } else if constexpr (CSameAs) { /* ..;..;;; */ return (InChar >= U16TEXT('\u0000') && InChar <= U16TEXT('\u001F')) || (InChar >= U16TEXT('\u007F') && InChar <= U16TEXT('\u009F')) || (InChar == U16TEXT('\u2028') || InChar == U16TEXT('\u2029')); } else if constexpr (CSameAs) { /* ..;..;;; */ return (InChar >= U32TEXT('\u0000') && InChar <= U32TEXT('\u001F')) || (InChar >= U32TEXT('\u007F') && InChar <= U32TEXT('\u009F')) || (InChar == U32TEXT('\u2028') || InChar == U32TEXT('\u2029')); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsGraph(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isgraph(InChar, Loc); } else if constexpr (CSameAs) { /* * BASIC LATIN * ..; */ if (InChar >= U8TEXT('\u0021') && InChar <= U8TEXT('\u007E')) return true; return false; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return TChar::IsGraph(static_cast(InChar)); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return TChar::IsGraph(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsSpace(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isspace(InChar, Loc); } else if constexpr (CSameAs) { /* * ISO/IEC 6429 * ..; */ if (InChar >= U8TEXT('\u0009') && InChar <= U8TEXT('\u000D')) return true; /* * BASIC LATIN * ; */ if (InChar == U8TEXT('\u0020')) return true; return false; } else if constexpr (CSameAs) { /* * ISO/IEC 6429 * ..; */ if (InChar >= U16TEXT('\u0009') && InChar <= U16TEXT('\u000D')) return true; /* * BASIC LATIN * ; */ if (InChar == U16TEXT('\u0020')) return true; /* * OGHAM * ; */ if (InChar == U16TEXT('\u1680')) return true; /* * MONGOL * ; */ if (InChar == U16TEXT('\u180E')) return true; /* * GENERAL PUNCTUATION * ..;..;;;; */ if ((InChar >= U16TEXT('\u2000') && InChar <= U16TEXT('\u2006')) || (InChar >= U16TEXT('\u2008') && InChar <= U16TEXT('\u200A')) || (InChar == U16TEXT('\u2028') || InChar == U16TEXT('\u2029')) || (InChar == U16TEXT('\u205F'))) return true; /* * CJK SYMBOLS AND PUNCTUATION, HIRAGANA * ; */ if (InChar == U16TEXT('\u3000')) return true; return false; } else if constexpr (CSameAs) { /* * ISO/IEC 6429 * ..; */ if (InChar >= U32TEXT('\u0009') && InChar <= U32TEXT('\u000D')) return true; /* * BASIC LATIN * ; */ if (InChar == U32TEXT('\u0020')) return true; /* * OGHAM * ; */ if (InChar == U32TEXT('\u1680')) return true; /* * MONGOL * ; */ if (InChar == U32TEXT('\u180E')) return true; /* * GENERAL PUNCTUATION * ..;..;;;; */ if ((InChar >= U32TEXT('\u2000') && InChar <= U32TEXT('\u2006')) || (InChar >= U32TEXT('\u2008') && InChar <= U32TEXT('\u200A')) || (InChar == U32TEXT('\u2028') || InChar == U32TEXT('\u2029')) || (InChar == U32TEXT('\u205F'))) return true; /* * CJK SYMBOLS AND PUNCTUATION, HIRAGANA * ; */ if (InChar == U32TEXT('\u3000')) return true; return false; } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsBlank(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isblank(InChar, Loc); } else if constexpr (CSameAs) { /* ;; */ return InChar == U8TEXT('\u0009') || InChar == U8TEXT('\u0020'); } else if constexpr (CSameAs) { /* ;;;;..;..;;; */ return (InChar >= U16TEXT('\u2000') && InChar <= U16TEXT('\u2006')) || (InChar == U16TEXT('\u0009') || InChar == U16TEXT('\u0020')) || (InChar == U16TEXT('\u1680') || InChar == U16TEXT('\u180E')) || (InChar == U16TEXT('\u2008') || InChar == U16TEXT('\u200A')) || (InChar == U16TEXT('\u205F') || InChar == U16TEXT('\u3000')); } else if constexpr (CSameAs) { /* ;;;;..;..;;; */ return (InChar >= U32TEXT('\u2000') && InChar <= U32TEXT('\u2006')) || (InChar == U32TEXT('\u0009') || InChar == U32TEXT('\u0020')) || (InChar == U32TEXT('\u1680') || InChar == U32TEXT('\u180E')) || (InChar == U32TEXT('\u2008') || InChar == U32TEXT('\u200A')) || (InChar == U32TEXT('\u205F') || InChar == U32TEXT('\u3000')); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsPrint(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::isprint(InChar, Loc); } else if constexpr (CSameAs) { /* * BASIC LATIN * ..; */ if (InChar >= U8TEXT('\u0020') && InChar <= U8TEXT('\u007E')) return true; return false; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return TChar::IsPrint(static_cast(InChar)); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return TChar::IsPrint(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr bool IsPunct(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return NAMESPACE_STD::ispunct(InChar, Loc); } else if constexpr (CSameAs) { /* * BASIC LATIN * ..;..;..;..; */ if ((InChar >= U8TEXT('\u0021') && InChar <= U8TEXT('\u002F')) || (InChar >= U8TEXT('\u003A') && InChar <= U8TEXT('\u0040')) || (InChar >= U8TEXT('\u005B') && InChar <= U8TEXT('\u0060')) || (InChar >= U8TEXT('\u007B') && InChar <= U8TEXT('\u007E'))) return true; return false; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return TChar::IsPunct(static_cast(InChar)); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return TChar::IsPunct(static_cast(InChar)); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return false; } NODISCARD FORCEINLINE static constexpr CharType ToLower(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return static_cast(NAMESPACE_STD::tolower(InChar, Loc)); } else if constexpr (CSameAs) { /* * BASIC LATIN * (,);(,);(,);(,); * (,);(,);(,);(,); * (,);(,);(,);(,); * (,);(,);(,);(,); * (,);(,);(,);(,); * (,);(,);(,);(,); * (,);(,); */ if (InChar >= U8TEXT('\u0041') && InChar <= U8TEXT('\u005A')) return InChar + U8TEXT('\u0020'); return InChar; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return static_cast(TChar::ToLower(static_cast(InChar))); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return static_cast(TChar::ToLower(static_cast(InChar))); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return InChar; } NODISCARD FORCEINLINE static constexpr CharType ToUpper(CharType InChar) { if constexpr (CSameAs || CSameAs) { NAMESPACE_STD::locale Loc(""); return static_cast(NAMESPACE_STD::toupper(InChar, Loc)); } else if constexpr (CSameAs) { /* * BASIC LATIN * (,);(,);(,);(,);/ * (,);(,);(,);(,);/ * (,);(,);(,);(,);/ * (,);(,);(,);(,);/ * (,);(,);(,);(,);/ * (,);(,);(,);(,);/ * (,);(,); */ if (InChar >= U8TEXT('\u0061') && InChar <= U8TEXT('\u007A')) return InChar - U8TEXT('\u0020'); return InChar; } else if constexpr (CSameAs) { checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block.")); if (InChar > U16TEXT('\u007F')) return false; return static_cast(TChar::ToUpper(static_cast(InChar))); } else if constexpr (CSameAs) { checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block.")); if (InChar > U32TEXT('\u007F')) return false; return static_cast(TChar::ToUpper(static_cast(InChar))); } else static_assert(sizeof(CharType) == -1, "Unsupported character type"); return InChar; } NODISCARD FORCEINLINE static constexpr TOptional ToDigit(CharType InChar) { switch (InChar) { case LITERAL(CharType, '0'): return 0; case LITERAL(CharType, '1'): return 1; case LITERAL(CharType, '2'): return 2; case LITERAL(CharType, '3'): return 3; case LITERAL(CharType, '4'): return 4; case LITERAL(CharType, '5'): return 5; case LITERAL(CharType, '6'): return 6; case LITERAL(CharType, '7'): return 7; case LITERAL(CharType, '8'): return 8; case LITERAL(CharType, '9'): return 9; case LITERAL(CharType, 'a'): return 10; case LITERAL(CharType, 'b'): return 11; case LITERAL(CharType, 'c'): return 12; case LITERAL(CharType, 'd'): return 13; case LITERAL(CharType, 'e'): return 14; case LITERAL(CharType, 'f'): return 15; case LITERAL(CharType, 'g'): return 16; case LITERAL(CharType, 'h'): return 17; case LITERAL(CharType, 'i'): return 18; case LITERAL(CharType, 'j'): return 19; case LITERAL(CharType, 'k'): return 20; case LITERAL(CharType, 'l'): return 21; case LITERAL(CharType, 'm'): return 22; case LITERAL(CharType, 'n'): return 23; case LITERAL(CharType, 'o'): return 24; case LITERAL(CharType, 'p'): return 25; case LITERAL(CharType, 'q'): return 26; case LITERAL(CharType, 'r'): return 27; case LITERAL(CharType, 's'): return 28; case LITERAL(CharType, 't'): return 29; case LITERAL(CharType, 'u'): return 30; case LITERAL(CharType, 'v'): return 31; case LITERAL(CharType, 'w'): return 32; case LITERAL(CharType, 'x'): return 33; case LITERAL(CharType, 'y'): return 34; case LITERAL(CharType, 'z'): return 35; case LITERAL(CharType, 'A'): return 10; case LITERAL(CharType, 'B'): return 11; case LITERAL(CharType, 'C'): return 12; case LITERAL(CharType, 'D'): return 13; case LITERAL(CharType, 'E'): return 14; case LITERAL(CharType, 'F'): return 15; case LITERAL(CharType, 'G'): return 16; case LITERAL(CharType, 'H'): return 17; case LITERAL(CharType, 'I'): return 18; case LITERAL(CharType, 'J'): return 19; case LITERAL(CharType, 'K'): return 20; case LITERAL(CharType, 'L'): return 21; case LITERAL(CharType, 'M'): return 22; case LITERAL(CharType, 'N'): return 23; case LITERAL(CharType, 'O'): return 24; case LITERAL(CharType, 'P'): return 25; case LITERAL(CharType, 'Q'): return 26; case LITERAL(CharType, 'R'): return 27; case LITERAL(CharType, 'S'): return 28; case LITERAL(CharType, 'T'): return 29; case LITERAL(CharType, 'U'): return 30; case LITERAL(CharType, 'V'): return 31; case LITERAL(CharType, 'W'): return 32; case LITERAL(CharType, 'X'): return 33; case LITERAL(CharType, 'Y'): return 34; case LITERAL(CharType, 'Z'): return 35; default: return Invalid; } } NODISCARD FORCEINLINE static constexpr TOptional FromDigit(int InDigit) { if (InDigit < 0 || InDigit > 35) return Invalid; return LITERAL(CharType, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")[InDigit]; } }; using FChar = TChar; using FWChar = TChar; using FU8Char = TChar; using FU16Char = TChar; using FU32Char = TChar; using FUnicodeChar = TChar; static_assert(FUnicodeChar::bIsFixedLength); NAMESPACE_MODULE_END(Utility) NAMESPACE_MODULE_END(Redcraft) NAMESPACE_REDCRAFT_END