From 86bfec58c7e282b4d3b8c4015f20becba8a86546 Mon Sep 17 00:00:00 2001 From: roker Date: Thu, 7 Oct 2021 20:35:39 +0200 Subject: [PATCH] single-char isNFC() removed. Makes no sense. Add freestanding operator+() for nfc_string --- src/nfc.cc | 27 ++++++++++++++------------- src/nfc.hh | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/src/nfc.cc b/src/nfc.cc index 3e6879c..c52f204 100644 --- a/src/nfc.cc +++ b/src/nfc.cc @@ -554,24 +554,25 @@ std::u32string createNFC(std::u32string nfd) } -template<> -IsNFC UTF::isNFC(char c) +template +bool UTF::is_safe_NFC_start(std::basic_string_view s) { - if( c & 0x80 ) - throw illegal_utf("Single octet >0x80 is invalid UTF-8"); + if(s.empty() || (s[0] & 0x80)==0 ) // shortcut for empty string or starts with ASCII char + { + return true; + } + + const CharT* begin = s.data(); + const CharT* const end = s.data() + s.size(); - return IsNFC::Yes; // all ASCII characters are valid NFC. + const uint32_t u = parse(begin, end); + if(NFC_No.count(u)) return false; + if(NFC_Maybe.count(u)) return false; + + return true; } -template<> -IsNFC UTF::isNFC(char16_t c) -{ - if(NFC_No.count(c)) return IsNFC::No; - if(NFC_Maybe.count(c)) return IsNFC::Maybe; - return IsNFC::Yes; -} - template IsNFC UTF::isNFC_quick_check(std::basic_string_view s) diff --git a/src/nfc.hh b/src/nfc.hh index 3139c3f..8a8a0bc 100644 --- a/src/nfc.hh +++ b/src/nfc.hh @@ -47,9 +47,9 @@ public: void generate(const char32_t c, OutIter& out); - /// returns the NFC class of a single character + /// returns whether the sequence starts with IsNFC==Yes char static - IsNFC isNFC(CharT c); + bool is_safe_NFC_start(std::basic_string_view s); /// returns No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe" /// might throw illegal_utf exception @@ -205,6 +205,10 @@ public: return s.find( std::forward(args)... ); } + /// might throw illegal_utf, if a multi-char sequence is clipped. + nfc_string substr(std::size_t pos=0, std::size_t count=npos) const; + + private: std::basic_string s; @@ -214,6 +218,39 @@ public: }; }; +/// can be more efficient than the operator+() below. +template +typename +UTF::nfc_string operator+( + typename UTF::nfc_string left, + const typename UTF::nfc_string& right); + +template +inline +typename +UTF::nfc_string operator+(typename UTF::nfc_string left, const T& right) +{ + return left+=right; +} + +template +inline +typename +UTF::nfc_string operator+(typename UTF::nfc_string&& left, const T& right) +{ + return left+=right; +} + + +template +inline +typename +UTF::nfc_string operator+(const T& left, const typename UTF::nfc_string& right) +{ + UTF left_s{left}; + return left_s+=right; +} + /// convenient alias names: using UTF8 = UTF; using UTF16 = UTF;