You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
327 lines
9.8 KiB
327 lines
9.8 KiB
// This file is under GNU General Public License 3.0
|
|
// see LICENSE.txt
|
|
|
|
#ifndef LIBPEPDATATYPES_NFC_HH
|
|
#define LIBPEPDATATYPES_NFC_HH
|
|
|
|
#include <string_view>
|
|
#include <string>
|
|
#include <stdexcept>
|
|
#include <iosfwd>
|
|
#include <boost/operators.hpp>
|
|
#include <pEp/identity_list.h>
|
|
|
|
namespace pEp {
|
|
|
|
/// Tri-sate return value of isNFC_quick_check()
|
|
enum class IsNFC
|
|
{
|
|
No=0, //!< string contains a character that cannot occur in NFC
|
|
Maybe=1, //!< string contains a character that is only allowed in certain positions in NFC
|
|
Yes=2 //!< string contains no invalid or partially valid character
|
|
};
|
|
|
|
std::ostream& operator<<(std::ostream& o, IsNFC is_nfc);
|
|
|
|
|
|
/// Exception class thrown whenever a string is parsed that is not a valid
|
|
/// UTF-8 or UTF-16 sequence.
|
|
class illegal_utf : public std::runtime_error
|
|
{
|
|
public:
|
|
illegal_utf( std::string_view, unsigned position, const std::string& reason);
|
|
illegal_utf(std::u16string_view, unsigned position, const std::string& reason);
|
|
explicit illegal_utf(const std::string& message);
|
|
};
|
|
|
|
|
|
/// Common class template to define the same functions for all 3 Unicode Transfer Formats.
|
|
template<class CharT>
|
|
class UTF
|
|
{
|
|
public:
|
|
/// parses a sequence of input code units into one Unicode code point and updates the input iterator c.
|
|
/// \todo change to iterator templates?
|
|
static
|
|
uint32_t parse(const CharT*& c, const CharT* end);
|
|
|
|
/// generates a UTF sequence from a given Unicode code point.
|
|
template<class OutIter>
|
|
static
|
|
void generate(const char32_t c, OutIter& out);
|
|
|
|
/// returns whether the sequence starts with IsNFC==Yes char
|
|
static
|
|
bool is_safe_NFC_start(std::basic_string_view<CharT> s);
|
|
|
|
/// returns No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe".
|
|
/// use isNFC() for a comprehensive NFC check.
|
|
/// Might throw illegal_utf exception
|
|
static
|
|
IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
|
|
|
|
/// runs first quick check and a deep test if quick check returns "Maybe".
|
|
static
|
|
bool isNFC(std::basic_string_view<CharT> s);
|
|
|
|
/// returns true if the sequence is valid UTF-8
|
|
static
|
|
bool isUtf(const CharT* begin, const CharT* end);
|
|
|
|
/// converts a C++ string (in UTF-8/-16) into NFC form
|
|
static
|
|
std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
|
|
|
|
/// calculates the number of "code units" in the target Unicode Transfer Format.
|
|
static
|
|
size_t utf_length(std::u32string_view s);
|
|
|
|
/// generates a whole u32string at once
|
|
static
|
|
std::basic_string<CharT> generate(const std::u32string& s);
|
|
|
|
/// creates an NFD u32string from UTF-8/UTF-16 input string s
|
|
static
|
|
std::u32string fromUtf_decompose(std::basic_string_view<CharT> s);
|
|
|
|
|
|
/// class holding a NFC-conform Unicode string.
|
|
/// content is mostly read-only, because arbitrary modifications might destroy NFC conformacy.
|
|
class nfc_string : public boost::totally_ordered2<nfc_string, std::basic_string_view<CharT>>
|
|
{
|
|
public:
|
|
typedef std::basic_string<CharT> String;
|
|
typedef std::basic_string_view<CharT> StringView;
|
|
|
|
/// only const_reference is supported.
|
|
typedef typename String::const_reference const_reference;
|
|
typedef typename String::const_pointer const_pointer;
|
|
|
|
/// only forward iterator. Does a backward_iterator make sense in UTF-encoded strings?
|
|
typedef typename String::const_iterator const_iterator;
|
|
|
|
static
|
|
constexpr size_t npos = String::npos;
|
|
|
|
explicit nfc_string(StringView src);
|
|
explicit nfc_string(String && src);
|
|
|
|
/// construct from a NUL-terminated src
|
|
explicit nfc_string(const CharT* src)
|
|
: nfc_string{ StringView{src} }
|
|
{}
|
|
|
|
nfc_string(const CharT* src, size_t length)
|
|
: nfc_string{ StringView{src, length} }
|
|
{}
|
|
|
|
nfc_string() = default;
|
|
nfc_string(const nfc_string& src) = default;
|
|
nfc_string( nfc_string&& src) = default;
|
|
|
|
nfc_string& operator=(const nfc_string& src) = default;
|
|
nfc_string& operator=( nfc_string&& src) = default;
|
|
|
|
nfc_string& assign(StringView src);
|
|
nfc_string& assign(String && src);
|
|
nfc_string& assign(const CharT* src) { return this->assign(StringView{src}); }
|
|
|
|
nfc_string& operator=(StringView src) { return this->assign(src); }
|
|
nfc_string& operator=(String && src) { return this->assign(std::move(src)); }
|
|
nfc_string& operator=(const CharT* src) { return this->assign(StringView{src}); }
|
|
|
|
/// read-only: shares representation
|
|
operator const String&() const noexcept { return s; }
|
|
|
|
/// read-only: shares representation
|
|
const String& get() const noexcept { return s;}
|
|
|
|
/// read write: copy content
|
|
operator String() const { return s; }
|
|
|
|
const CharT* c_str() const noexcept { return s.c_str(); }
|
|
const CharT* data() const noexcept { return s.data(); }
|
|
std::size_t size() const noexcept { return s.size(); }
|
|
bool empty() const noexcept { return s.empty(); }
|
|
|
|
std::size_t capacity() const noexcept { return s.capacity(); }
|
|
void reserve(std::size_t new_capacity) { s.reserve(new_capacity); }
|
|
void shrink_to_fit() { s.shrink_to_fit(); }
|
|
|
|
const_reference operator[](std::size_t ofs) const noexcept { return s[ofs]; }
|
|
const_reference at(std::size_t ofs) const { return s.at(ofs); }
|
|
const_reference front() const noexcept { return s.front(); }
|
|
const_reference back() const noexcept { return s.back(); }
|
|
operator StringView() const noexcept { return StringView{s}; }
|
|
|
|
const_iterator begin() const noexcept { return s.cbegin(); }
|
|
const_iterator cbegin() const noexcept { return s.cbegin(); } /// r/o access only
|
|
const_iterator end() const noexcept { return s.cend(); }
|
|
const_iterator cend() const noexcept { return s.cend(); } /// r/o access only
|
|
|
|
void clear() { s.clear(); }
|
|
|
|
/// I am lazy and delegate all the 10 different insert() overloads directly to s.
|
|
template<typename... Args>
|
|
nfc_string& insert(Args&& ...args)
|
|
{
|
|
s.insert( std::forward<Args>(args)... );
|
|
normalize();
|
|
return *this;
|
|
}
|
|
|
|
/// delegates all erase() overloads to s.
|
|
template<typename... Args>
|
|
nfc_string& erase(Args&& ...args)
|
|
{
|
|
s.erase( std::forward<Args>(args)... );
|
|
normalize();
|
|
return *this;
|
|
}
|
|
|
|
nfc_string& push_back(CharT c);
|
|
|
|
/// delegates all 9 append() overloads to s.
|
|
template<typename... Args>
|
|
nfc_string& append(Args&& ...args)
|
|
{
|
|
s.append( std::forward<Args>(args)... );
|
|
normalize();
|
|
return *this;
|
|
}
|
|
|
|
/// more expensive, because 's' might not be in NFC.
|
|
nfc_string& operator+=(StringView s);
|
|
|
|
/// optimization possible to avoid re-normalization in most cases.
|
|
nfc_string& operator+=(const nfc_string& s);
|
|
|
|
/// optimization possible to avoid re-normalization in most cases.
|
|
nfc_string& operator+=(CharT c) { push_back(c); return *this; }
|
|
|
|
/// delegates all 9 compare() overloads to s
|
|
template<typename... Args>
|
|
int compare(Args&& ...args) const
|
|
{
|
|
return s.compare( std::forward<Args>(args)... );
|
|
}
|
|
|
|
/// stolen from C++20
|
|
bool starts_with(StringView s) const noexcept;
|
|
|
|
/// stolen from C++20
|
|
bool ends_with(StringView s) const noexcept;
|
|
|
|
/// delegates all 5 find() overloads to s
|
|
template<typename... Args>
|
|
std::size_t find(Args&& ...args) const
|
|
{
|
|
return s.find( std::forward<Args>(args)... );
|
|
}
|
|
|
|
/// might throw illegal_utf, if a multi-char sequence is clipped.
|
|
nfc_string substr(std::size_t pos=0, std::size_t count=npos) const;
|
|
|
|
private:
|
|
std::basic_string<CharT> s;
|
|
|
|
/// (re-)normalize the content string s.
|
|
void normalize();
|
|
};
|
|
};
|
|
|
|
/// can be more efficient than the operator+() below.
|
|
template<class CharT>
|
|
typename
|
|
UTF<CharT>::nfc_string operator+(
|
|
typename UTF<CharT>::nfc_string left,
|
|
const typename UTF<CharT>::nfc_string& right);
|
|
|
|
template<class CharT, class T>
|
|
inline
|
|
typename
|
|
UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string left, const T& right)
|
|
{
|
|
return left+=right;
|
|
}
|
|
|
|
template<class CharT, class T>
|
|
inline
|
|
typename
|
|
UTF<CharT>::nfc_string operator+(typename UTF<CharT>::nfc_string&& left, const T& right)
|
|
{
|
|
return left+=right;
|
|
}
|
|
|
|
|
|
template<class CharT, class T>
|
|
inline
|
|
typename
|
|
UTF<CharT>::nfc_string operator+(const T& left, const typename UTF<CharT>::nfc_string& right)
|
|
{
|
|
typename UTF<CharT>::nfc_string left_s{left};
|
|
return left_s+=right;
|
|
}
|
|
|
|
template<class CharT>
|
|
inline
|
|
bool operator<(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
|
|
{
|
|
return left<right;
|
|
}
|
|
|
|
template<class CharT>
|
|
inline
|
|
bool operator==(const typename UTF<CharT>::nfc_string& left, std::basic_string_view<CharT> right)
|
|
{
|
|
return left==right;
|
|
}
|
|
|
|
|
|
/// convenient alias names:
|
|
using UTF8 = UTF<char>;
|
|
using UTF16 = UTF<char16_t>;
|
|
|
|
using nfc_string = UTF8::nfc_string;
|
|
using nfc_u16string = UTF16::nfc_string;
|
|
|
|
// throws illegal_utf8 exception if s is not valid UTF-8
|
|
void assert_utf8(std::string_view s);
|
|
|
|
|
|
// convert NFD to NFC
|
|
std::u32string createNFC(std::u32string nfd_string);
|
|
|
|
/*
|
|
// return No or Maybe, if at least one character with NFC_Quickcheck class is "No" or "Maybe"
|
|
// might throw illegal_utf exception
|
|
template<class CharT>
|
|
IsNFC isNFC_quick_check(std::basic_string_view<CharT> s);
|
|
|
|
// runs first quick check and a deep test if quick check returns "Maybe".
|
|
template<class CharT>
|
|
bool isNFC(std::basic_string_view<CharT> s);
|
|
|
|
// returns true if the sequence is valid UTF-8
|
|
bool isUtf8(const char* begin, const char* end);
|
|
|
|
// converts a C++ string (in UTF-8) into NFC form
|
|
// s is ''moved'' to the return value if possible so no copy is done here.
|
|
template<class CharT>
|
|
std::basic_string<CharT> toNFC(std::basic_string_view<CharT> s);
|
|
*/
|
|
|
|
// creates a UTF-8-encoded NFC string from s
|
|
std::string toNFC_8(std::u16string_view s);
|
|
|
|
// convenience functions to avoid ::strdup(pEp::toNFC<char>(text).c_str());
|
|
// and unecessary temporary std::string etc.
|
|
char* strdup_NFC(std::string_view s);
|
|
|
|
pEp_identity *identity_dup_NFC(const ::pEp_identity* value);
|
|
::identity_list* identity_list_dup_NFC(const ::identity_list* value);
|
|
|
|
|
|
} // end of namespace pEp
|
|
|
|
#endif // LIBPEPDATATYPES_NFC_HH
|
|
|