From 3df9122ef02ef09166553f3c9e9c53c441a8ad38 Mon Sep 17 00:00:00 2001 From: Redstone1024 <2824517378@qq.com> Date: Sat, 26 Oct 2024 21:15:27 +0800 Subject: [PATCH] refactor(string): refactor TChar to enhance support for unicode --- .../Source/Private/Testing/StringTesting.cpp | 108 ++-- Redcraft.Utility/Source/Public/String/Char.h | 507 +++++++++++++----- 2 files changed, 413 insertions(+), 202 deletions(-) diff --git a/Redcraft.Utility/Source/Private/Testing/StringTesting.cpp b/Redcraft.Utility/Source/Private/Testing/StringTesting.cpp index bd42aa0..2b7e77b 100644 --- a/Redcraft.Utility/Source/Private/Testing/StringTesting.cpp +++ b/Redcraft.Utility/Source/Private/Testing/StringTesting.cpp @@ -22,10 +22,11 @@ void TestChar() { always_check(!CCharType); always_check(CCharType); - always_check(CCharType); - always_check(CCharType); - always_check(CCharType); - always_check(CCharType); + always_check(CCharType); + always_check(CCharType); + always_check(CCharType); + always_check(CCharType); + always_check(CCharType); } { @@ -71,31 +72,31 @@ void TestChar() } { -// always_check(FU16Char::IsAlnum(U16TEXT('0'))); -// always_check(FU16Char::IsAlpha(U16TEXT('A'))); -// always_check(FU16Char::IsLower(U16TEXT('a'))); -// always_check(FU16Char::IsUpper(U16TEXT('A'))); + always_check(FU16Char::IsAlnum(U16TEXT('0'))); + always_check(FU16Char::IsAlpha(U16TEXT('A'))); + always_check(FU16Char::IsLower(U16TEXT('a'))); + always_check(FU16Char::IsUpper(U16TEXT('A'))); always_check(FU16Char::IsDigit(U16TEXT('0'))); always_check(FU16Char::IsCntrl(U16TEXT('\n'))); -// always_check(FU16Char::IsGraph(U16TEXT('!'))); + always_check(FU16Char::IsGraph(U16TEXT('!'))); always_check(FU16Char::IsSpace(U16TEXT('\t'))); always_check(FU16Char::IsBlank(U16TEXT(' '))); -// always_check(FU16Char::IsPrint(U16TEXT('#'))); -// always_check(FU16Char::IsPunct(U16TEXT('['))); + always_check(FU16Char::IsPrint(U16TEXT('#'))); + always_check(FU16Char::IsPunct(U16TEXT('['))); } { -// always_check(FU32Char::IsAlnum(U32TEXT('0'))); -// always_check(FU32Char::IsAlpha(U32TEXT('A'))); -// always_check(FU32Char::IsLower(U32TEXT('a'))); -// always_check(FU32Char::IsUpper(U32TEXT('A'))); + always_check(FU32Char::IsAlnum(U32TEXT('0'))); + always_check(FU32Char::IsAlpha(U32TEXT('A'))); + always_check(FU32Char::IsLower(U32TEXT('a'))); + always_check(FU32Char::IsUpper(U32TEXT('A'))); always_check(FU32Char::IsDigit(U32TEXT('0'))); always_check(FU32Char::IsCntrl(U32TEXT('\n'))); -// always_check(FU32Char::IsGraph(U32TEXT('!'))); + always_check(FU32Char::IsGraph(U32TEXT('!'))); always_check(FU32Char::IsSpace(U32TEXT('\t'))); always_check(FU32Char::IsBlank(U32TEXT(' '))); -// always_check(FU32Char::IsPrint(U32TEXT('#'))); -// always_check(FU32Char::IsPunct(U32TEXT('['))); + always_check(FU32Char::IsPrint(U32TEXT('#'))); + always_check(FU32Char::IsPunct(U32TEXT('['))); } { @@ -107,7 +108,7 @@ void TestChar() always_check(!FChar::IsCntrl(TEXT('_'))); always_check(!FChar::IsGraph(TEXT(' '))); always_check(!FChar::IsSpace(TEXT('='))); - always_check(!FChar::IsBlank(TEXT('\r'))); + always_check(!FChar::IsBlank(TEXT('+'))); always_check(!FChar::IsPrint(TEXT('\n'))); always_check(!FChar::IsPunct(TEXT('H'))); } @@ -121,7 +122,7 @@ void TestChar() always_check(!FWChar::IsCntrl(WTEXT('_'))); always_check(!FWChar::IsGraph(WTEXT(' '))); always_check(!FWChar::IsSpace(WTEXT('='))); - always_check(!FWChar::IsBlank(WTEXT('\r'))); + always_check(!FWChar::IsBlank(WTEXT('+'))); always_check(!FWChar::IsPrint(WTEXT('\n'))); always_check(!FWChar::IsPunct(WTEXT('H'))); } @@ -135,37 +136,37 @@ void TestChar() always_check(!FU8Char::IsCntrl(U8TEXT('_'))); always_check(!FU8Char::IsGraph(U8TEXT(' '))); always_check(!FU8Char::IsSpace(U8TEXT('='))); - always_check(!FU8Char::IsBlank(U8TEXT('\r'))); + always_check(!FU8Char::IsBlank(U8TEXT('+'))); always_check(!FU8Char::IsPrint(U8TEXT('\n'))); always_check(!FU8Char::IsPunct(U8TEXT('H'))); } { -// always_check(!FU16Char::IsAlnum(U16TEXT('$'))); -// always_check(!FU16Char::IsAlpha(U16TEXT('0'))); -// always_check(!FU16Char::IsLower(U16TEXT('A'))); -// always_check(!FU16Char::IsUpper(U16TEXT('a'))); + always_check(!FU16Char::IsAlnum(U16TEXT('$'))); + always_check(!FU16Char::IsAlpha(U16TEXT('0'))); + always_check(!FU16Char::IsLower(U16TEXT('A'))); + always_check(!FU16Char::IsUpper(U16TEXT('a'))); always_check(!FU16Char::IsDigit(U16TEXT('I'))); always_check(!FU16Char::IsCntrl(U16TEXT('_'))); -// always_check(!FU16Char::IsGraph(U16TEXT(' '))); + always_check(!FU16Char::IsGraph(U16TEXT(' '))); always_check(!FU16Char::IsSpace(U16TEXT('='))); - always_check(!FU16Char::IsBlank(U16TEXT('\r'))); -// always_check(!FU16Char::IsPrint(U16TEXT('\n'))); -// always_check(!FU16Char::IsPunct(U16TEXT('H'))); + always_check(!FU16Char::IsBlank(U16TEXT('+'))); + always_check(!FU16Char::IsPrint(U16TEXT('\n'))); + always_check(!FU16Char::IsPunct(U16TEXT('H'))); } { -// always_check(!FU32Char::IsAlnum(U32TEXT('$'))); -// always_check(!FU32Char::IsAlpha(U32TEXT('0'))); -// always_check(!FU32Char::IsLower(U32TEXT('A'))); -// always_check(!FU32Char::IsUpper(U32TEXT('a'))); + always_check(!FU32Char::IsAlnum(U32TEXT('$'))); + always_check(!FU32Char::IsAlpha(U32TEXT('0'))); + always_check(!FU32Char::IsLower(U32TEXT('A'))); + always_check(!FU32Char::IsUpper(U32TEXT('a'))); always_check(!FU32Char::IsDigit(U32TEXT('I'))); always_check(!FU32Char::IsCntrl(U32TEXT('_'))); -// always_check(!FU32Char::IsGraph(U32TEXT(' '))); + always_check(!FU32Char::IsGraph(U32TEXT(' '))); always_check(!FU32Char::IsSpace(U32TEXT('='))); - always_check(!FU32Char::IsBlank(U32TEXT('\r'))); -// always_check(!FU32Char::IsPrint(U32TEXT('\n'))); -// always_check(!FU32Char::IsPunct(U32TEXT('H'))); + always_check(!FU32Char::IsBlank(U32TEXT('+'))); + always_check(!FU32Char::IsPrint(U32TEXT('\n'))); + always_check(!FU32Char::IsPunct(U32TEXT('H'))); } { @@ -188,10 +189,10 @@ void TestChar() always_check(FWChar::ToLower(WTEXT('i')) == WTEXT('i')); always_check(FU8Char::ToLower(U8TEXT('i')) == U8TEXT('i')); always_check(FU8Char::ToUpper(U8TEXT('l')) == U8TEXT('L')); -// always_check(FU16Char::ToLower(U16TEXT('i')) == U16TEXT('i')); -// always_check(FU16Char::ToUpper(U16TEXT('l')) == U16TEXT('L')); -// always_check(FU32Char::ToLower(U32TEXT('i')) == U32TEXT('i')); -// always_check(FU32Char::ToUpper(U32TEXT('l')) == U32TEXT('L')); + always_check(FU16Char::ToLower(U16TEXT('i')) == U16TEXT('i')); + always_check(FU16Char::ToUpper(U16TEXT('l')) == U16TEXT('L')); + always_check(FU32Char::ToLower(U32TEXT('i')) == U32TEXT('i')); + always_check(FU32Char::ToUpper(U32TEXT('l')) == U32TEXT('L')); } { @@ -279,14 +280,14 @@ void TestCString() Memory::Memzero(StrD); always_check(TCString::Copy(StrD, &StrD[4], StrA, nullptr) == nullptr); - + always_check(TCString::Compare(StrC, EndC , StrD, EndD ) == 0); always_check(TCString::Compare(StrC, nullptr, StrD, nullptr) == 0); always_check(TCString::Copy(StrD, nullptr, StrA, &StrA[4]) != nullptr); always_check(TCString::Length(StrD, nullptr) == 4); - + always_check(TCString::Compare(StrA, &StrA[4], StrD, &StrD[4]) == 0); always_check(TCString::Compare(StrA, nullptr , StrD, nullptr ) > 0); @@ -304,7 +305,7 @@ void TestCString() always_check(TCString::Compare(StrD, nullptr, LITERAL(T, "Hello "), nullptr) == 0); Memory::Memzero(StrD); - + always_check(TCString::Cat(StrD, nullptr, StrA, nullptr) != nullptr); always_check(TCString::Cat(StrD, nullptr, LITERAL(T, " "), nullptr) != nullptr); always_check(TCString::Cat(StrD, nullptr, StrB, nullptr) != nullptr); @@ -323,13 +324,13 @@ void TestCString() always_check(TCString::Find(StrA, nullptr, [](T A) { return A == LITERAL(T, 'l'); }) != TCString::Find(StrA, nullptr, [](T A) { return A == LITERAL(T, 'l'); }, ESearchDirection::FromEnd)); - + always_check(TCString::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'o'); }) == TCString::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'o'); }, ESearchDirection::FromEnd)); always_check(TCString::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'l'); }) != TCString::Find(StrA, EndA, [](T A) { return A == LITERAL(T, 'l'); }, ESearchDirection::FromEnd)); - + always_check(TCString::Find(StrA, &StrA[4], [](T A) { return A == LITERAL(T, 'o'); }) == TCString::Find(StrA, &StrA[4], [](T A) { return A == LITERAL(T, 'o'); }, ESearchDirection::FromEnd)); @@ -346,13 +347,13 @@ void TestCString() always_check(TCString::FindChar(StrA, nullptr, LITERAL(T, 'l')) != TCString::FindChar(StrA, nullptr, LITERAL(T, 'l'), ESearchDirection::FromEnd)); - + always_check(TCString::FindChar(StrA, EndA, LITERAL(T, 'o')) == TCString::FindChar(StrA, EndA, LITERAL(T, 'o'), ESearchDirection::FromEnd)); always_check(TCString::FindChar(StrA, EndA, LITERAL(T, 'l')) != TCString::FindChar(StrA, EndA, LITERAL(T, 'l'), ESearchDirection::FromEnd)); - + always_check(TCString::FindChar(StrA, &StrA[4], LITERAL(T, 'o')) == TCString::FindChar(StrA, &StrA[4], LITERAL(T, 'o'), ESearchDirection::FromEnd)); @@ -370,7 +371,7 @@ void TestCString() always_check(TCString::FindNotChar(StrA, EndA , LITERAL(T, '\0')) == StrA); always_check(TCString::FindNotChar(StrA, nullptr , LITERAL(T, 'I')) == StrA); always_check(TCString::FindNotChar(StrA, &StrA[2], LITERAL(T, 'I')) == StrA); - + always_check(TCString::FindNotChar(StrA, nullptr , LITERAL(T, '\0'), ESearchDirection::FromEnd) == StrA + 2); always_check(TCString::FindNotChar(StrA, EndA , LITERAL(T, '\0'), ESearchDirection::FromEnd) == StrA + 2); always_check(TCString::FindNotChar(StrA, nullptr , LITERAL(T, 'I'), ESearchDirection::FromEnd) == StrA + 3); @@ -400,10 +401,11 @@ void TestCString() }; TestTCString(InPlaceType); - TestTCString(InPlaceType); - TestTCString(InPlaceType); - TestTCString(InPlaceType); - TestTCString(InPlaceType); + TestTCString(InPlaceType); + TestTCString(InPlaceType); + TestTCString(InPlaceType); + TestTCString(InPlaceType); + TestTCString(InPlaceType); } NAMESPACE_END(Testing) diff --git a/Redcraft.Utility/Source/Public/String/Char.h b/Redcraft.Utility/Source/Public/String/Char.h index 83a1bbf..260a8c6 100644 --- a/Redcraft.Utility/Source/Public/String/Char.h +++ b/Redcraft.Utility/Source/Public/String/Char.h @@ -1,18 +1,19 @@ #pragma once #include "CoreTypes.h" +#include "Templates/Optional.h" #include "TypeTraits/TypeTraits.h" #include "Miscellaneous/AssertionMacros.h" -#include -#include +#include +#include NAMESPACE_REDCRAFT_BEGIN NAMESPACE_MODULE_BEGIN(Redcraft) NAMESPACE_MODULE_BEGIN(Utility) template -concept CCharType = CSameAs || CSameAs || CSameAs || CSameAs || CSameAs; +concept CCharType = CSameAs || CSameAs || CSameAs || CSameAs || CSameAs; NAMESPACE_PRIVATE_BEGIN @@ -22,78 +23,161 @@ struct TLiteral; template <> struct TLiteral { - NODISCARD FORCEINLINE static constexpr const char Select(const char X, const wchar_t , const char8_t , const char16_t , const char32_t ) { return X; } - NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar_t*, const char8_t*, const char16_t*, const char32_t*) { return X; } + NODISCARD FORCEINLINE static constexpr char Select(const char X, const wchar , const u8char , const u16char , const u32char ) { return X; } + NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar*, const u8char*, const u16char*, const u32char*) { return X; } }; template <> -struct TLiteral +struct TLiteral { - NODISCARD FORCEINLINE static constexpr const wchar_t Select(const char , const wchar_t X, const char8_t , const char16_t , const char32_t ) { return X; } - NODISCARD FORCEINLINE static constexpr const wchar_t* Select(const char*, const wchar_t* X, const char8_t*, const char16_t*, const char32_t*) { return X; } + NODISCARD FORCEINLINE static constexpr wchar Select(const char , const wchar X, const u8char , const u16char , const u32char ) { return X; } + NODISCARD FORCEINLINE static constexpr const wchar* Select(const char*, const wchar* X, const u8char*, const u16char*, const u32char*) { return X; } }; template <> -struct TLiteral +struct TLiteral { - NODISCARD FORCEINLINE static constexpr const char8_t Select(const char , const wchar_t , const char8_t X, const char16_t , const char32_t ) { return X; } - NODISCARD FORCEINLINE static constexpr const char8_t* Select(const char*, const wchar_t*, const char8_t* X, const char16_t*, const char32_t*) { return X; } + NODISCARD FORCEINLINE static constexpr u8char Select(const char , const wchar , const u8char X, const u16char , const u32char ) { return X; } + NODISCARD FORCEINLINE static constexpr const u8char* Select(const char*, const wchar*, const u8char* X, const u16char*, const u32char*) { return X; } }; template <> -struct TLiteral +struct TLiteral { - NODISCARD FORCEINLINE static constexpr const char16_t Select(const char , const wchar_t , const char8_t , const char16_t X, const char32_t ) { return X; } - NODISCARD FORCEINLINE static constexpr const char16_t* Select(const char*, const wchar_t*, const char8_t*, const char16_t* X, const char32_t*) { return X; } + NODISCARD FORCEINLINE static constexpr u16char Select(const char , const wchar , const u8char , const u16char X, const u32char ) { return X; } + NODISCARD FORCEINLINE static constexpr const u16char* Select(const char*, const wchar*, const u8char*, const u16char* X, const u32char*) { return X; } }; template <> -struct TLiteral +struct TLiteral { - NODISCARD FORCEINLINE static constexpr const char32_t Select(const char , const wchar_t , const char8_t , const char16_t , const char32_t X) { return X; } - NODISCARD FORCEINLINE static constexpr const char32_t* Select(const char*, const wchar_t*, const char8_t*, const char16_t*, const char32_t* X) { return X; } + NODISCARD FORCEINLINE static constexpr u32char Select(const char , const wchar , const u8char , const u16char , const u32char X) { return X; } + NODISCARD FORCEINLINE static constexpr const u32char* Select(const char*, const wchar*, const u8char*, const u16char*, const u32char* X) { return X; } }; NAMESPACE_PRIVATE_END /** Templated literal struct to allow selection of string literals based on the character type provided, and not on compiler switches. */ -#define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral::Select(StringLiteral, WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral)) +#define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral::Select(TEXT(StringLiteral), WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral)) -/** Set of utility functions operating on a single character. Implemented based on ISO 30112 "i18n". */ +/** Set of utility functions operating on a single character. Implemented based on user-preferred locale and ISO 30112 "i18n". */ template struct TChar { using CharType = T; - inline static constexpr CharType NONE = CharType(-1); + /** The maximum number of code units required to represent a single character. if unknown, guess 1. */ + static constexpr size_t MaxCodeUnitLength = + CSameAs ? MB_LEN_MAX : + CSameAs ? + PLATFORM_WINDOWS ? 2 : + PLATFORM_LINUX ? 1 : 1 : + CSameAs ? 4 : + CSameAs ? 2 : + CSameAs ? 1 : 1; + + /** Whether the character type is fixed-length. */ + static constexpr bool bIsFixedLength = MaxCodeUnitLength == 1; + + NODISCARD FORCEINLINE static constexpr bool IsValid(CharType InChar) + { + if constexpr (CSameAs) + { + if ((InChar & 0b10000000) == 0b00000000) return true; + + return false; + } + + else if constexpr (CSameAs || CSameAs) + { + if (InChar >= 0xD800 && InChar <= 0xDBFF) return false; + if (InChar >= 0xDC00 && InChar <= 0xDFFF) return false; + + return InChar <= 0x10FFFF; + } + + // Windows uses UTF-16 encoding for wchar. + else if constexpr (PLATFORM_WINDOWS && (CSameAs)) + { + return TChar::IsValid(static_cast(InChar)); + } + + // Linux uses UTF-32 encoding for wchar. + else if constexpr (PLATFORM_LINUX && (CSameAs)) + { + return TChar::IsValid(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; + } + + NODISCARD FORCEINLINE static constexpr bool IsNonch(CharType InChar) + { + if constexpr (CSameAs) + { + return false; + } + + else if constexpr (CSameAs) + { + if (InChar >= U16TEXT('\uFDD0') && InChar <= U16TEXT('\uFDEF')) return true; + + if (InChar == U16TEXT('\uFFFE')) return true; + if (InChar == U16TEXT('\uFFFF')) return true; + + return false; + } + + else if constexpr (CSameAs) + { + if (InChar >= U32TEXT('\uFDD0') && InChar <= U32TEXT('\uFDEF')) return true; + + if ((InChar & 0x0000FFFE) == 0x0000FFFE) return TChar::IsValid(InChar); + + return false; + } + + // Windows uses UTF-16 encoding for wchar. + else if constexpr (PLATFORM_WINDOWS && (CSameAs)) + { + return TChar::IsNonch(static_cast(InChar)); + } + + // Linux uses UTF-32 encoding for wchar. + else if constexpr (PLATFORM_LINUX && (CSameAs)) + { + return TChar::IsNonch(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; + } NODISCARD FORCEINLINE static constexpr bool IsAlnum(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isalnum(static_cast(InChar)); - } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswalnum(InChar); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isalnum(InChar, Loc); } else { - return IsAlpha(InChar) || IsDigit(InChar); + return TChar::IsAlpha(InChar) || TChar::IsDigit(InChar); } } NODISCARD FORCEINLINE static constexpr bool IsAlpha(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isalpha(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isalpha(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswalpha(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -105,23 +189,30 @@ struct TChar return false; } - else + + else if constexpr (CSameAs || CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= LITERAL(CharType, '\u007F'), TEXT("TChar::IsAlpha() only supports basic latin block.")); + + if (InChar > LITERAL(CharType, '\u007F')) return false; + + return TChar::IsAlpha(static_cast(InChar)); } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsLower(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::islower(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::islower(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswlower(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -131,23 +222,39 @@ struct TChar return false; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return TChar::IsLower(static_cast(InChar)); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return TChar::IsLower(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsUpper(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isupper(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isupper(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswupper(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -157,12 +264,30 @@ struct TChar return false; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return TChar::IsUpper(static_cast(InChar)); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return TChar::IsUpper(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } - + NODISCARD FORCEINLINE static constexpr bool IsDigit(CharType InChar) { /* ..; */ @@ -182,20 +307,19 @@ struct TChar NODISCARD FORCEINLINE static constexpr bool IsCntrl(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::iscntrl(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::iscntrl(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswcntrl(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* ..;; */ return (InChar >= U8TEXT('\u0000') && InChar <= U8TEXT('\u001F')) || InChar == U8TEXT('\u007F'); } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* ..;..;;; */ return @@ -203,7 +327,8 @@ struct TChar (InChar >= U16TEXT('\u007F') && InChar <= U16TEXT('\u009F')) || (InChar == U16TEXT('\u2028') || InChar == U16TEXT('\u2029')); } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* ..;..;;; */ return @@ -211,23 +336,21 @@ struct TChar (InChar >= U32TEXT('\u007F') && InChar <= U32TEXT('\u009F')) || (InChar == U32TEXT('\u2028') || InChar == U32TEXT('\u2029')); } - else - { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); - } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsGraph(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isgraph(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isgraph(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswgraph(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -237,23 +360,39 @@ struct TChar return false; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return TChar::IsGraph(static_cast(InChar)); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return TChar::IsGraph(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsSpace(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isspace(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isspace(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswspace(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * ISO/IEC 6429 @@ -269,7 +408,8 @@ struct TChar return false; } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * ISO/IEC 6429 @@ -313,7 +453,8 @@ struct TChar return false; } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * ISO/IEC 6429 @@ -357,28 +498,27 @@ struct TChar return false; } - else - { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); - } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsBlank(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isblank(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isblank(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswblank(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* ;; */ return InChar == U8TEXT('\u0009') || InChar == U8TEXT('\u0020'); } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* ;;;;..;..;;; */ return @@ -388,7 +528,8 @@ struct TChar (InChar == U16TEXT('\u2008') || InChar == U16TEXT('\u200A')) || (InChar == U16TEXT('\u205F') || InChar == U16TEXT('\u3000')); } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* ;;;;..;..;;; */ return @@ -398,23 +539,21 @@ struct TChar (InChar == U32TEXT('\u2008') || InChar == U32TEXT('\u200A')) || (InChar == U32TEXT('\u205F') || InChar == U32TEXT('\u3000')); } - else - { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); - } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsPrint(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::isprint(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::isprint(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswprint(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -424,23 +563,39 @@ struct TChar return false; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return TChar::IsPrint(static_cast(InChar)); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return TChar::IsPrint(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr bool IsPunct(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return NAMESPACE_STD::ispunct(static_cast(InChar)); + NAMESPACE_STD::locale Loc(""); + return NAMESPACE_STD::ispunct(InChar, Loc); } - else if constexpr (CSameAs) - { - return NAMESPACE_STD::iswpunct(InChar); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -454,23 +609,39 @@ struct TChar return false; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return TChar::IsPunct(static_cast(InChar)); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return TChar::IsPunct(static_cast(InChar)); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return false; } NODISCARD FORCEINLINE static constexpr CharType ToLower(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return static_cast(NAMESPACE_STD::tolower(static_cast(InChar))); + NAMESPACE_STD::locale Loc(""); + return static_cast(NAMESPACE_STD::tolower(InChar, Loc)); } - else if constexpr (CSameAs) - { - return static_cast(NAMESPACE_STD::towlower(InChar)); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -486,23 +657,39 @@ struct TChar return InChar; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return static_cast(TChar::ToLower(static_cast(InChar))); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return static_cast(TChar::ToLower(static_cast(InChar))); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return InChar; } NODISCARD FORCEINLINE static constexpr CharType ToUpper(CharType InChar) { - if constexpr (CSameAs) + if constexpr (CSameAs || CSameAs) { - return static_cast(NAMESPACE_STD::toupper(static_cast(InChar))); + NAMESPACE_STD::locale Loc(""); + return static_cast(NAMESPACE_STD::toupper(InChar, Loc)); } - else if constexpr (CSameAs) - { - return static_cast(NAMESPACE_STD::towupper(InChar)); - } - else if constexpr (CSameAs) + + else if constexpr (CSameAs) { /* * BASIC LATIN @@ -518,13 +705,31 @@ struct TChar return InChar; } - else + + else if constexpr (CSameAs) { - static_assert(sizeof(CharType) == -1, "Unsupported character type"); + checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block.")); + + if (InChar > U16TEXT('\u007F')) return false; + + return static_cast(TChar::ToUpper(static_cast(InChar))); } + + else if constexpr (CSameAs) + { + checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block.")); + + if (InChar > U32TEXT('\u007F')) return false; + + return static_cast(TChar::ToUpper(static_cast(InChar))); + } + + else static_assert(sizeof(CharType) == -1, "Unsupported character type"); + + return InChar; } - NODISCARD FORCEINLINE static constexpr int ToDigit(CharType InChar) + NODISCARD FORCEINLINE static constexpr TOptional ToDigit(CharType InChar) { switch (InChar) { @@ -564,7 +769,7 @@ struct TChar case LITERAL(CharType, 'x'): return 33; case LITERAL(CharType, 'y'): return 34; case LITERAL(CharType, 'z'): return 35; - case LITERAL(CharType, 'A'): return 10; + case LITERAL(CharType, 'A'): return 10; case LITERAL(CharType, 'B'): return 11; case LITERAL(CharType, 'C'): return 12; case LITERAL(CharType, 'D'): return 13; @@ -590,23 +795,27 @@ struct TChar case LITERAL(CharType, 'X'): return 33; case LITERAL(CharType, 'Y'): return 34; case LITERAL(CharType, 'Z'): return 35; - default: return -1; + default: return Invalid; } } - NODISCARD FORCEINLINE static constexpr CharType FromDigit(int InDigit) + NODISCARD FORCEINLINE static constexpr TOptional FromDigit(int InDigit) { - if (InDigit < 0 || InDigit >= 36) return NONE; + if (InDigit < 0 || InDigit > 35) return Invalid; return LITERAL(CharType, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")[InDigit]; } + }; -using FChar = TChar; -using FWChar = TChar; -using FU8Char = TChar; -using FU16Char = TChar; -using FU32Char = TChar; +using FChar = TChar; +using FWChar = TChar; +using FU8Char = TChar; +using FU16Char = TChar; +using FU32Char = TChar; +using FUnicodeChar = TChar; + +static_assert(FUnicodeChar::bIsFixedLength); NAMESPACE_MODULE_END(Utility) NAMESPACE_MODULE_END(Redcraft)