refactor(string): refactor TChar to enhance support for unicode

This commit is contained in:
Redstone1024 2024-10-26 21:15:27 +08:00
parent 66dc21310f
commit 3df9122ef0
2 changed files with 413 additions and 202 deletions

View File

@ -22,10 +22,11 @@ void TestChar()
{ {
always_check(!CCharType<int>); always_check(!CCharType<int>);
always_check(CCharType<char>); always_check(CCharType<char>);
always_check(CCharType<wchar_t>); always_check(CCharType<wchar>);
always_check(CCharType<char8_t>); always_check(CCharType<u8char>);
always_check(CCharType<char16_t>); always_check(CCharType<u16char>);
always_check(CCharType<char32_t>); always_check(CCharType<u32char>);
always_check(CCharType<unicodechar>);
} }
{ {
@ -71,31 +72,31 @@ void TestChar()
} }
{ {
// always_check(FU16Char::IsAlnum(U16TEXT('0'))); always_check(FU16Char::IsAlnum(U16TEXT('0')));
// always_check(FU16Char::IsAlpha(U16TEXT('A'))); always_check(FU16Char::IsAlpha(U16TEXT('A')));
// always_check(FU16Char::IsLower(U16TEXT('a'))); always_check(FU16Char::IsLower(U16TEXT('a')));
// always_check(FU16Char::IsUpper(U16TEXT('A'))); always_check(FU16Char::IsUpper(U16TEXT('A')));
always_check(FU16Char::IsDigit(U16TEXT('0'))); always_check(FU16Char::IsDigit(U16TEXT('0')));
always_check(FU16Char::IsCntrl(U16TEXT('\n'))); always_check(FU16Char::IsCntrl(U16TEXT('\n')));
// always_check(FU16Char::IsGraph(U16TEXT('!'))); always_check(FU16Char::IsGraph(U16TEXT('!')));
always_check(FU16Char::IsSpace(U16TEXT('\t'))); always_check(FU16Char::IsSpace(U16TEXT('\t')));
always_check(FU16Char::IsBlank(U16TEXT(' '))); always_check(FU16Char::IsBlank(U16TEXT(' ')));
// always_check(FU16Char::IsPrint(U16TEXT('#'))); always_check(FU16Char::IsPrint(U16TEXT('#')));
// always_check(FU16Char::IsPunct(U16TEXT('['))); always_check(FU16Char::IsPunct(U16TEXT('[')));
} }
{ {
// always_check(FU32Char::IsAlnum(U32TEXT('0'))); always_check(FU32Char::IsAlnum(U32TEXT('0')));
// always_check(FU32Char::IsAlpha(U32TEXT('A'))); always_check(FU32Char::IsAlpha(U32TEXT('A')));
// always_check(FU32Char::IsLower(U32TEXT('a'))); always_check(FU32Char::IsLower(U32TEXT('a')));
// always_check(FU32Char::IsUpper(U32TEXT('A'))); always_check(FU32Char::IsUpper(U32TEXT('A')));
always_check(FU32Char::IsDigit(U32TEXT('0'))); always_check(FU32Char::IsDigit(U32TEXT('0')));
always_check(FU32Char::IsCntrl(U32TEXT('\n'))); always_check(FU32Char::IsCntrl(U32TEXT('\n')));
// always_check(FU32Char::IsGraph(U32TEXT('!'))); always_check(FU32Char::IsGraph(U32TEXT('!')));
always_check(FU32Char::IsSpace(U32TEXT('\t'))); always_check(FU32Char::IsSpace(U32TEXT('\t')));
always_check(FU32Char::IsBlank(U32TEXT(' '))); always_check(FU32Char::IsBlank(U32TEXT(' ')));
// always_check(FU32Char::IsPrint(U32TEXT('#'))); always_check(FU32Char::IsPrint(U32TEXT('#')));
// always_check(FU32Char::IsPunct(U32TEXT('['))); always_check(FU32Char::IsPunct(U32TEXT('[')));
} }
{ {
@ -107,7 +108,7 @@ void TestChar()
always_check(!FChar::IsCntrl(TEXT('_'))); always_check(!FChar::IsCntrl(TEXT('_')));
always_check(!FChar::IsGraph(TEXT(' '))); always_check(!FChar::IsGraph(TEXT(' ')));
always_check(!FChar::IsSpace(TEXT('='))); always_check(!FChar::IsSpace(TEXT('=')));
always_check(!FChar::IsBlank(TEXT('\r'))); always_check(!FChar::IsBlank(TEXT('+')));
always_check(!FChar::IsPrint(TEXT('\n'))); always_check(!FChar::IsPrint(TEXT('\n')));
always_check(!FChar::IsPunct(TEXT('H'))); always_check(!FChar::IsPunct(TEXT('H')));
} }
@ -121,7 +122,7 @@ void TestChar()
always_check(!FWChar::IsCntrl(WTEXT('_'))); always_check(!FWChar::IsCntrl(WTEXT('_')));
always_check(!FWChar::IsGraph(WTEXT(' '))); always_check(!FWChar::IsGraph(WTEXT(' ')));
always_check(!FWChar::IsSpace(WTEXT('='))); always_check(!FWChar::IsSpace(WTEXT('=')));
always_check(!FWChar::IsBlank(WTEXT('\r'))); always_check(!FWChar::IsBlank(WTEXT('+')));
always_check(!FWChar::IsPrint(WTEXT('\n'))); always_check(!FWChar::IsPrint(WTEXT('\n')));
always_check(!FWChar::IsPunct(WTEXT('H'))); always_check(!FWChar::IsPunct(WTEXT('H')));
} }
@ -135,37 +136,37 @@ void TestChar()
always_check(!FU8Char::IsCntrl(U8TEXT('_'))); always_check(!FU8Char::IsCntrl(U8TEXT('_')));
always_check(!FU8Char::IsGraph(U8TEXT(' '))); always_check(!FU8Char::IsGraph(U8TEXT(' ')));
always_check(!FU8Char::IsSpace(U8TEXT('='))); always_check(!FU8Char::IsSpace(U8TEXT('=')));
always_check(!FU8Char::IsBlank(U8TEXT('\r'))); always_check(!FU8Char::IsBlank(U8TEXT('+')));
always_check(!FU8Char::IsPrint(U8TEXT('\n'))); always_check(!FU8Char::IsPrint(U8TEXT('\n')));
always_check(!FU8Char::IsPunct(U8TEXT('H'))); always_check(!FU8Char::IsPunct(U8TEXT('H')));
} }
{ {
// always_check(!FU16Char::IsAlnum(U16TEXT('$'))); always_check(!FU16Char::IsAlnum(U16TEXT('$')));
// always_check(!FU16Char::IsAlpha(U16TEXT('0'))); always_check(!FU16Char::IsAlpha(U16TEXT('0')));
// always_check(!FU16Char::IsLower(U16TEXT('A'))); always_check(!FU16Char::IsLower(U16TEXT('A')));
// always_check(!FU16Char::IsUpper(U16TEXT('a'))); always_check(!FU16Char::IsUpper(U16TEXT('a')));
always_check(!FU16Char::IsDigit(U16TEXT('I'))); always_check(!FU16Char::IsDigit(U16TEXT('I')));
always_check(!FU16Char::IsCntrl(U16TEXT('_'))); always_check(!FU16Char::IsCntrl(U16TEXT('_')));
// always_check(!FU16Char::IsGraph(U16TEXT(' '))); always_check(!FU16Char::IsGraph(U16TEXT(' ')));
always_check(!FU16Char::IsSpace(U16TEXT('='))); always_check(!FU16Char::IsSpace(U16TEXT('=')));
always_check(!FU16Char::IsBlank(U16TEXT('\r'))); always_check(!FU16Char::IsBlank(U16TEXT('+')));
// always_check(!FU16Char::IsPrint(U16TEXT('\n'))); always_check(!FU16Char::IsPrint(U16TEXT('\n')));
// always_check(!FU16Char::IsPunct(U16TEXT('H'))); always_check(!FU16Char::IsPunct(U16TEXT('H')));
} }
{ {
// always_check(!FU32Char::IsAlnum(U32TEXT('$'))); always_check(!FU32Char::IsAlnum(U32TEXT('$')));
// always_check(!FU32Char::IsAlpha(U32TEXT('0'))); always_check(!FU32Char::IsAlpha(U32TEXT('0')));
// always_check(!FU32Char::IsLower(U32TEXT('A'))); always_check(!FU32Char::IsLower(U32TEXT('A')));
// always_check(!FU32Char::IsUpper(U32TEXT('a'))); always_check(!FU32Char::IsUpper(U32TEXT('a')));
always_check(!FU32Char::IsDigit(U32TEXT('I'))); always_check(!FU32Char::IsDigit(U32TEXT('I')));
always_check(!FU32Char::IsCntrl(U32TEXT('_'))); always_check(!FU32Char::IsCntrl(U32TEXT('_')));
// always_check(!FU32Char::IsGraph(U32TEXT(' '))); always_check(!FU32Char::IsGraph(U32TEXT(' ')));
always_check(!FU32Char::IsSpace(U32TEXT('='))); always_check(!FU32Char::IsSpace(U32TEXT('=')));
always_check(!FU32Char::IsBlank(U32TEXT('\r'))); always_check(!FU32Char::IsBlank(U32TEXT('+')));
// always_check(!FU32Char::IsPrint(U32TEXT('\n'))); always_check(!FU32Char::IsPrint(U32TEXT('\n')));
// always_check(!FU32Char::IsPunct(U32TEXT('H'))); always_check(!FU32Char::IsPunct(U32TEXT('H')));
} }
{ {
@ -188,10 +189,10 @@ void TestChar()
always_check(FWChar::ToLower(WTEXT('i')) == WTEXT('i')); always_check(FWChar::ToLower(WTEXT('i')) == WTEXT('i'));
always_check(FU8Char::ToLower(U8TEXT('i')) == U8TEXT('i')); always_check(FU8Char::ToLower(U8TEXT('i')) == U8TEXT('i'));
always_check(FU8Char::ToUpper(U8TEXT('l')) == U8TEXT('L')); always_check(FU8Char::ToUpper(U8TEXT('l')) == U8TEXT('L'));
// always_check(FU16Char::ToLower(U16TEXT('i')) == U16TEXT('i')); always_check(FU16Char::ToLower(U16TEXT('i')) == U16TEXT('i'));
// always_check(FU16Char::ToUpper(U16TEXT('l')) == U16TEXT('L')); always_check(FU16Char::ToUpper(U16TEXT('l')) == U16TEXT('L'));
// always_check(FU32Char::ToLower(U32TEXT('i')) == U32TEXT('i')); always_check(FU32Char::ToLower(U32TEXT('i')) == U32TEXT('i'));
// always_check(FU32Char::ToUpper(U32TEXT('l')) == U32TEXT('L')); always_check(FU32Char::ToUpper(U32TEXT('l')) == U32TEXT('L'));
} }
{ {
@ -400,10 +401,11 @@ void TestCString()
}; };
TestTCString(InPlaceType<char>); TestTCString(InPlaceType<char>);
TestTCString(InPlaceType<wchar_t>); TestTCString(InPlaceType<wchar>);
TestTCString(InPlaceType<char8_t>); TestTCString(InPlaceType<u8char>);
TestTCString(InPlaceType<char16_t>); TestTCString(InPlaceType<u16char>);
TestTCString(InPlaceType<char32_t>); TestTCString(InPlaceType<u32char>);
TestTCString(InPlaceType<unicodechar>);
} }
NAMESPACE_END(Testing) NAMESPACE_END(Testing)

View File

@ -1,18 +1,19 @@
#pragma once #pragma once
#include "CoreTypes.h" #include "CoreTypes.h"
#include "Templates/Optional.h"
#include "TypeTraits/TypeTraits.h" #include "TypeTraits/TypeTraits.h"
#include "Miscellaneous/AssertionMacros.h" #include "Miscellaneous/AssertionMacros.h"
#include <cctype> #include <locale>
#include <cwctype> #include <climits>
NAMESPACE_REDCRAFT_BEGIN NAMESPACE_REDCRAFT_BEGIN
NAMESPACE_MODULE_BEGIN(Redcraft) NAMESPACE_MODULE_BEGIN(Redcraft)
NAMESPACE_MODULE_BEGIN(Utility) NAMESPACE_MODULE_BEGIN(Utility)
template <typename T> template <typename T>
concept CCharType = CSameAs<T, char> || CSameAs<T, wchar_t> || CSameAs<T, char8_t> || CSameAs<T, char16_t> || CSameAs<T, char32_t>; concept CCharType = CSameAs<T, char> || CSameAs<T, wchar> || CSameAs<T, u8char> || CSameAs<T, u16char> || CSameAs<T, u32char>;
NAMESPACE_PRIVATE_BEGIN NAMESPACE_PRIVATE_BEGIN
@ -22,78 +23,161 @@ struct TLiteral;
template <> template <>
struct TLiteral<char> struct TLiteral<char>
{ {
NODISCARD FORCEINLINE static constexpr const char Select(const char X, const wchar_t , const char8_t , const char16_t , const char32_t ) { return X; } NODISCARD FORCEINLINE static constexpr char Select(const char X, const wchar , const u8char , const u16char , const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar_t*, const char8_t*, const char16_t*, const char32_t*) { return X; } NODISCARD FORCEINLINE static constexpr const char* Select(const char* X, const wchar*, const u8char*, const u16char*, const u32char*) { return X; }
}; };
template <> template <>
struct TLiteral<wchar_t> struct TLiteral<wchar>
{ {
NODISCARD FORCEINLINE static constexpr const wchar_t Select(const char , const wchar_t X, const char8_t , const char16_t , const char32_t ) { return X; } NODISCARD FORCEINLINE static constexpr wchar Select(const char , const wchar X, const u8char , const u16char , const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const wchar_t* Select(const char*, const wchar_t* X, const char8_t*, const char16_t*, const char32_t*) { return X; } NODISCARD FORCEINLINE static constexpr const wchar* Select(const char*, const wchar* X, const u8char*, const u16char*, const u32char*) { return X; }
}; };
template <> template <>
struct TLiteral<char8_t> struct TLiteral<u8char>
{ {
NODISCARD FORCEINLINE static constexpr const char8_t Select(const char , const wchar_t , const char8_t X, const char16_t , const char32_t ) { return X; } NODISCARD FORCEINLINE static constexpr u8char Select(const char , const wchar , const u8char X, const u16char , const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const char8_t* Select(const char*, const wchar_t*, const char8_t* X, const char16_t*, const char32_t*) { return X; } NODISCARD FORCEINLINE static constexpr const u8char* Select(const char*, const wchar*, const u8char* X, const u16char*, const u32char*) { return X; }
}; };
template <> template <>
struct TLiteral<char16_t> struct TLiteral<u16char>
{ {
NODISCARD FORCEINLINE static constexpr const char16_t Select(const char , const wchar_t , const char8_t , const char16_t X, const char32_t ) { return X; } NODISCARD FORCEINLINE static constexpr u16char Select(const char , const wchar , const u8char , const u16char X, const u32char ) { return X; }
NODISCARD FORCEINLINE static constexpr const char16_t* Select(const char*, const wchar_t*, const char8_t*, const char16_t* X, const char32_t*) { return X; } NODISCARD FORCEINLINE static constexpr const u16char* Select(const char*, const wchar*, const u8char*, const u16char* X, const u32char*) { return X; }
}; };
template <> template <>
struct TLiteral<char32_t> struct TLiteral<u32char>
{ {
NODISCARD FORCEINLINE static constexpr const char32_t Select(const char , const wchar_t , const char8_t , const char16_t , const char32_t X) { return X; } NODISCARD FORCEINLINE static constexpr u32char Select(const char , const wchar , const u8char , const u16char , const u32char X) { return X; }
NODISCARD FORCEINLINE static constexpr const char32_t* Select(const char*, const wchar_t*, const char8_t*, const char16_t*, const char32_t* X) { return X; } NODISCARD FORCEINLINE static constexpr const u32char* Select(const char*, const wchar*, const u8char*, const u16char*, const u32char* X) { return X; }
}; };
NAMESPACE_PRIVATE_END NAMESPACE_PRIVATE_END
/** Templated literal struct to allow selection of string literals based on the character type provided, and not on compiler switches. */ /** Templated literal struct to allow selection of string literals based on the character type provided, and not on compiler switches. */
#define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral<CharType>::Select(StringLiteral, WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral)) #define LITERAL(CharType, StringLiteral) NAMESPACE_PRIVATE::TLiteral<CharType>::Select(TEXT(StringLiteral), WTEXT(StringLiteral), U8TEXT(StringLiteral), U16TEXT(StringLiteral), U32TEXT(StringLiteral))
/** Set of utility functions operating on a single character. Implemented based on ISO 30112 "i18n". */ /** Set of utility functions operating on a single character. Implemented based on user-preferred locale and ISO 30112 "i18n". */
template <CCharType T> template <CCharType T>
struct TChar struct TChar
{ {
using CharType = T; using CharType = T;
inline static constexpr CharType NONE = CharType(-1); /** The maximum number of code units required to represent a single character. if unknown, guess 1. */
static constexpr size_t MaxCodeUnitLength =
CSameAs<CharType, char> ? MB_LEN_MAX :
CSameAs<CharType, wchar> ?
PLATFORM_WINDOWS ? 2 :
PLATFORM_LINUX ? 1 : 1 :
CSameAs<CharType, u8char> ? 4 :
CSameAs<CharType, u16char> ? 2 :
CSameAs<CharType, u32char> ? 1 : 1;
/** Whether the character type is fixed-length. */
static constexpr bool bIsFixedLength = MaxCodeUnitLength == 1;
NODISCARD FORCEINLINE static constexpr bool IsValid(CharType InChar)
{
if constexpr (CSameAs<CharType, u8char>)
{
if ((InChar & 0b10000000) == 0b00000000) return true;
return false;
}
else if constexpr (CSameAs<CharType, u16char> || CSameAs<CharType, u32char>)
{
if (InChar >= 0xD800 && InChar <= 0xDBFF) return false;
if (InChar >= 0xDC00 && InChar <= 0xDFFF) return false;
return InChar <= 0x10FFFF;
}
// Windows uses UTF-16 encoding for wchar.
else if constexpr (PLATFORM_WINDOWS && (CSameAs<CharType, wchar>))
{
return TChar::IsValid(static_cast<u16char>(InChar));
}
// Linux uses UTF-32 encoding for wchar.
else if constexpr (PLATFORM_LINUX && (CSameAs<CharType, wchar>))
{
return TChar::IsValid(static_cast<u32char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsNonch(CharType InChar)
{
if constexpr (CSameAs<CharType, u8char>)
{
return false;
}
else if constexpr (CSameAs<CharType, u16char>)
{
if (InChar >= U16TEXT('\uFDD0') && InChar <= U16TEXT('\uFDEF')) return true;
if (InChar == U16TEXT('\uFFFE')) return true;
if (InChar == U16TEXT('\uFFFF')) return true;
return false;
}
else if constexpr (CSameAs<CharType, u32char>)
{
if (InChar >= U32TEXT('\uFDD0') && InChar <= U32TEXT('\uFDEF')) return true;
if ((InChar & 0x0000FFFE) == 0x0000FFFE) return TChar::IsValid(InChar);
return false;
}
// Windows uses UTF-16 encoding for wchar.
else if constexpr (PLATFORM_WINDOWS && (CSameAs<CharType, wchar>))
{
return TChar::IsNonch(static_cast<u16char>(InChar));
}
// Linux uses UTF-32 encoding for wchar.
else if constexpr (PLATFORM_LINUX && (CSameAs<CharType, wchar>))
{
return TChar::IsNonch(static_cast<u32char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
}
NODISCARD FORCEINLINE static constexpr bool IsAlnum(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsAlnum(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isalnum(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
} return NAMESPACE_STD::isalnum(InChar, Loc);
else if constexpr (CSameAs<CharType, wchar_t>)
{
return NAMESPACE_STD::iswalnum(InChar);
} }
else else
{ {
return IsAlpha(InChar) || IsDigit(InChar); return TChar::IsAlpha(InChar) || TChar::IsDigit(InChar);
} }
} }
NODISCARD FORCEINLINE static constexpr bool IsAlpha(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsAlpha(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isalpha(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isalpha(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswalpha(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -105,23 +189,30 @@ struct TChar
return false; return false;
} }
else
else if constexpr (CSameAs<CharType, u16char> || CSameAs<CharType, u32char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= LITERAL(CharType, '\u007F'), TEXT("TChar::IsAlpha() only supports basic latin block."));
if (InChar > LITERAL(CharType, '\u007F')) return false;
return TChar<u8char>::IsAlpha(static_cast<u8char>(InChar));
} }
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsLower(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsLower(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::islower(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::islower(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswlower(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -131,23 +222,39 @@ struct TChar
return false; return false;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsLower(static_cast<u8char>(InChar));
} }
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsLower() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsLower(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsUpper(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsUpper(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isupper(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isupper(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswupper(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -157,10 +264,28 @@ struct TChar
return false; return false;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsUpper(static_cast<u8char>(InChar));
} }
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsUpper() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsUpper(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsDigit(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsDigit(CharType InChar)
@ -182,20 +307,19 @@ struct TChar
NODISCARD FORCEINLINE static constexpr bool IsCntrl(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsCntrl(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::iscntrl(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::iscntrl(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswcntrl(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* <U0000>..<U001F>;<U007F>; */ /* <U0000>..<U001F>;<U007F>; */
return (InChar >= U8TEXT('\u0000') && InChar <= U8TEXT('\u001F')) || InChar == U8TEXT('\u007F'); return (InChar >= U8TEXT('\u0000') && InChar <= U8TEXT('\u001F')) || InChar == U8TEXT('\u007F');
} }
else if constexpr (CSameAs<CharType, char16_t>)
else if constexpr (CSameAs<CharType, u16char>)
{ {
/* <U0000>..<U001F>;<U007F>..<U009F>;<U2028>;<U2029>; */ /* <U0000>..<U001F>;<U007F>..<U009F>;<U2028>;<U2029>; */
return return
@ -203,7 +327,8 @@ struct TChar
(InChar >= U16TEXT('\u007F') && InChar <= U16TEXT('\u009F')) || (InChar >= U16TEXT('\u007F') && InChar <= U16TEXT('\u009F')) ||
(InChar == U16TEXT('\u2028') || InChar == U16TEXT('\u2029')); (InChar == U16TEXT('\u2028') || InChar == U16TEXT('\u2029'));
} }
else if constexpr (CSameAs<CharType, char32_t>)
else if constexpr (CSameAs<CharType, u32char>)
{ {
/* <U0000>..<U001F>;<U007F>..<U009F>;<U2028>;<U2029>; */ /* <U0000>..<U001F>;<U007F>..<U009F>;<U2028>;<U2029>; */
return return
@ -211,23 +336,21 @@ struct TChar
(InChar >= U32TEXT('\u007F') && InChar <= U32TEXT('\u009F')) || (InChar >= U32TEXT('\u007F') && InChar <= U32TEXT('\u009F')) ||
(InChar == U32TEXT('\u2028') || InChar == U32TEXT('\u2029')); (InChar == U32TEXT('\u2028') || InChar == U32TEXT('\u2029'));
} }
else
{ else static_assert(sizeof(CharType) == -1, "Unsupported character type");
static_assert(sizeof(CharType) == -1, "Unsupported character type");
} return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsGraph(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsGraph(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isgraph(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isgraph(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswgraph(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -237,23 +360,39 @@ struct TChar
return false; return false;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsGraph(static_cast<u8char>(InChar));
} }
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsGraph() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsGraph(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsSpace(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsSpace(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isspace(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isspace(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswspace(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* ISO/IEC 6429 * ISO/IEC 6429
@ -269,7 +408,8 @@ struct TChar
return false; return false;
} }
else if constexpr (CSameAs<CharType, char16_t>)
else if constexpr (CSameAs<CharType, u16char>)
{ {
/* /*
* ISO/IEC 6429 * ISO/IEC 6429
@ -313,7 +453,8 @@ struct TChar
return false; return false;
} }
else if constexpr (CSameAs<CharType, char32_t>)
else if constexpr (CSameAs<CharType, u32char>)
{ {
/* /*
* ISO/IEC 6429 * ISO/IEC 6429
@ -357,28 +498,27 @@ struct TChar
return false; return false;
} }
else
{ else static_assert(sizeof(CharType) == -1, "Unsupported character type");
static_assert(sizeof(CharType) == -1, "Unsupported character type");
} return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsBlank(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsBlank(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isblank(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isblank(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswblank(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* <U0009>;<U0020>; */ /* <U0009>;<U0020>; */
return InChar == U8TEXT('\u0009') || InChar == U8TEXT('\u0020'); return InChar == U8TEXT('\u0009') || InChar == U8TEXT('\u0020');
} }
else if constexpr (CSameAs<CharType, char16_t>)
else if constexpr (CSameAs<CharType, u16char>)
{ {
/* <U0009>;<U0020>;<U1680>;<U180E>;<U2000>..<U2006>;<U2008>..<U200A>;<U205F>;<U3000>; */ /* <U0009>;<U0020>;<U1680>;<U180E>;<U2000>..<U2006>;<U2008>..<U200A>;<U205F>;<U3000>; */
return return
@ -388,7 +528,8 @@ struct TChar
(InChar == U16TEXT('\u2008') || InChar == U16TEXT('\u200A')) || (InChar == U16TEXT('\u2008') || InChar == U16TEXT('\u200A')) ||
(InChar == U16TEXT('\u205F') || InChar == U16TEXT('\u3000')); (InChar == U16TEXT('\u205F') || InChar == U16TEXT('\u3000'));
} }
else if constexpr (CSameAs<CharType, char32_t>)
else if constexpr (CSameAs<CharType, u32char>)
{ {
/* <U0009>;<U0020>;<U1680>;<U180E>;<U2000>..<U2006>;<U2008>..<U200A>;<U205F>;<U3000>; */ /* <U0009>;<U0020>;<U1680>;<U180E>;<U2000>..<U2006>;<U2008>..<U200A>;<U205F>;<U3000>; */
return return
@ -398,23 +539,21 @@ struct TChar
(InChar == U32TEXT('\u2008') || InChar == U32TEXT('\u200A')) || (InChar == U32TEXT('\u2008') || InChar == U32TEXT('\u200A')) ||
(InChar == U32TEXT('\u205F') || InChar == U32TEXT('\u3000')); (InChar == U32TEXT('\u205F') || InChar == U32TEXT('\u3000'));
} }
else
{ else static_assert(sizeof(CharType) == -1, "Unsupported character type");
static_assert(sizeof(CharType) == -1, "Unsupported character type");
} return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsPrint(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsPrint(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::isprint(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::isprint(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswprint(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -424,23 +563,39 @@ struct TChar
return false; return false;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsPrint(static_cast<u8char>(InChar));
} }
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPrint() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsPrint(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
} }
NODISCARD FORCEINLINE static constexpr bool IsPunct(CharType InChar) NODISCARD FORCEINLINE static constexpr bool IsPunct(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return NAMESPACE_STD::ispunct(static_cast<unsigned char>(InChar)); NAMESPACE_STD::locale Loc("");
return NAMESPACE_STD::ispunct(InChar, Loc);
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return NAMESPACE_STD::iswpunct(InChar);
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -454,23 +609,39 @@ struct TChar
return false; return false;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return TChar<u8char>::IsPunct(static_cast<u8char>(InChar));
} }
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::IsPunct() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return TChar<u8char>::IsPunct(static_cast<u8char>(InChar));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return false;
} }
NODISCARD FORCEINLINE static constexpr CharType ToLower(CharType InChar) NODISCARD FORCEINLINE static constexpr CharType ToLower(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return static_cast<CharType>(NAMESPACE_STD::tolower(static_cast<unsigned char>(InChar))); NAMESPACE_STD::locale Loc("");
return static_cast<CharType>(NAMESPACE_STD::tolower(InChar, Loc));
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return static_cast<CharType>(NAMESPACE_STD::towlower(InChar));
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -486,23 +657,39 @@ struct TChar
return InChar; return InChar;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block."));
if (InChar > U16TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToLower(static_cast<u8char>(InChar)));
} }
else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToLower() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToLower(static_cast<u8char>(InChar)));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return InChar;
} }
NODISCARD FORCEINLINE static constexpr CharType ToUpper(CharType InChar) NODISCARD FORCEINLINE static constexpr CharType ToUpper(CharType InChar)
{ {
if constexpr (CSameAs<CharType, char>) if constexpr (CSameAs<CharType, char> || CSameAs<CharType, wchar>)
{ {
return static_cast<CharType>(NAMESPACE_STD::toupper(static_cast<unsigned char>(InChar))); NAMESPACE_STD::locale Loc("");
return static_cast<CharType>(NAMESPACE_STD::toupper(InChar, Loc));
} }
else if constexpr (CSameAs<CharType, wchar_t>)
{ else if constexpr (CSameAs<CharType, u8char>)
return static_cast<CharType>(NAMESPACE_STD::towupper(InChar));
}
else if constexpr (CSameAs<CharType, char8_t>)
{ {
/* /*
* BASIC LATIN * BASIC LATIN
@ -518,13 +705,31 @@ struct TChar
return InChar; return InChar;
} }
else
else if constexpr (CSameAs<CharType, u16char>)
{ {
static_assert(sizeof(CharType) == -1, "Unsupported character type"); checkf(InChar <= U16TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block."));
}
if (InChar > U16TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToUpper(static_cast<u8char>(InChar)));
} }
NODISCARD FORCEINLINE static constexpr int ToDigit(CharType InChar) else if constexpr (CSameAs<CharType, u32char>)
{
checkf(InChar <= U32TEXT('\u007F'), TEXT("TChar::ToUpper() only supports basic latin block."));
if (InChar > U32TEXT('\u007F')) return false;
return static_cast<u16char>(TChar<u8char>::ToUpper(static_cast<u8char>(InChar)));
}
else static_assert(sizeof(CharType) == -1, "Unsupported character type");
return InChar;
}
NODISCARD FORCEINLINE static constexpr TOptional<unsigned> ToDigit(CharType InChar)
{ {
switch (InChar) switch (InChar)
{ {
@ -590,23 +795,27 @@ struct TChar
case LITERAL(CharType, 'X'): return 33; case LITERAL(CharType, 'X'): return 33;
case LITERAL(CharType, 'Y'): return 34; case LITERAL(CharType, 'Y'): return 34;
case LITERAL(CharType, 'Z'): return 35; case LITERAL(CharType, 'Z'): return 35;
default: return -1; default: return Invalid;
} }
} }
NODISCARD FORCEINLINE static constexpr CharType FromDigit(int InDigit) NODISCARD FORCEINLINE static constexpr TOptional<CharType> FromDigit(int InDigit)
{ {
if (InDigit < 0 || InDigit >= 36) return NONE; if (InDigit < 0 || InDigit > 35) return Invalid;
return LITERAL(CharType, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")[InDigit]; return LITERAL(CharType, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")[InDigit];
} }
}; };
using FChar = TChar<char>; using FChar = TChar<char>;
using FWChar = TChar<wchar_t>; using FWChar = TChar<wchar>;
using FU8Char = TChar<char8_t>; using FU8Char = TChar<u8char>;
using FU16Char = TChar<char16_t>; using FU16Char = TChar<u16char>;
using FU32Char = TChar<char32_t>; using FU32Char = TChar<u32char>;
using FUnicodeChar = TChar<unicodechar>;
static_assert(FUnicodeChar::bIsFixedLength);
NAMESPACE_MODULE_END(Utility) NAMESPACE_MODULE_END(Utility)
NAMESPACE_MODULE_END(Redcraft) NAMESPACE_MODULE_END(Redcraft)