.. _program_listing_file_src_rdf4cpp_util_CharMatcher.hpp: Program Listing for File CharMatcher.hpp ======================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/rdf4cpp/util/CharMatcher.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #ifndef RDF4CPP_CHARMATCHER_HPP #define RDF4CPP_CHARMATCHER_HPP #include #include #include #include namespace rdf4cpp::util::char_matcher_detail { struct CharRange { char first = '\0'; char last = '\0'; }; template concept CharMatcher = requires(T const a, int c) { { a.match(c) } -> std::convertible_to; { T::simd_range_num } -> std::convertible_to; { T::fail_if_unicode } -> std::convertible_to; { a.simd_ranges() } -> std::same_as>; { a.simd_singles() } -> std::convertible_to; }; template std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString const &single) = delete; template<> std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString<1> const &single); template<> std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString<4> const &single); template<> std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString<18> const &single); template<> std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString<20> const &single); template<> std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString<21> const &single); template<> std::optional try_match_simd(std::string_view data, std::array const &ranges, datatypes::registry::util::ConstexprString<1> const &single); template bool contains_any(std::string_view data, datatypes::registry::util::ConstexprString const &match) = delete; template<> bool contains_any(std::string_view data, datatypes::registry::util::ConstexprString<5> const &match); template struct ASCIIPatternMatcher { datatypes::registry::util::ConstexprString pattern; explicit constexpr ASCIIPatternMatcher(char const (&str)[n]) noexcept : pattern(str) { } [[nodiscard]] constexpr bool match(int c) const noexcept { auto ch = static_cast(c); if (c != static_cast(ch)) // not asciii return false; return static_cast(pattern).find(ch) != std::string_view::npos; } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = true; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] consteval auto simd_singles() const noexcept { return pattern; } }; struct ASCIINumMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { auto ch = static_cast(c); if (c != static_cast(ch)) // not asciii return false; return c >= '0' && c <= '9'; } static constexpr size_t simd_range_num = 1; static constexpr bool fail_if_unicode = true; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return std::array{CharRange{'0', '9'}}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; struct ASCIIAlphaMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { auto ch = static_cast(c); if (c != static_cast(ch)) // not asciii return false; return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } static constexpr size_t simd_range_num = 2; static constexpr bool fail_if_unicode = true; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return std::array{ CharRange{'a', 'z'}, CharRange{'A', 'Z'}}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; template struct OrMatcher { A a; B b; constexpr OrMatcher(A a, B b) : a(a), b(b) {} constexpr OrMatcher() = default; [[nodiscard]] constexpr bool match(int c) const noexcept { return a.match(c) || b.match(c); } static constexpr size_t simd_range_num = A::simd_range_num + B::simd_range_num; static constexpr bool fail_if_unicode = A::fail_if_unicode && B::fail_if_unicode; [[nodiscard]] consteval std::array simd_ranges() const noexcept { std::array r{}; if constexpr (A::simd_range_num > 0) { auto aa = a.simd_ranges(); for (size_t s = 0; s < A::simd_range_num; ++s) { r[s] = aa[s]; } } if constexpr (B::simd_range_num > 0) { auto ba = b.simd_ranges(); for (size_t s = 0; s < B::simd_range_num; ++s) { r[s + A::simd_range_num] = ba[s]; } } return r; } [[nodiscard]] consteval auto simd_singles() const noexcept { return a.simd_singles() + b.simd_singles(); } }; template constexpr OrMatcher operator|(A a, B b) { return OrMatcher{a, b}; } constexpr auto ascii_alphanum_matcher = ASCIIAlphaMatcher{} | ASCIINumMatcher{}; struct UCSCharMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { return (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFEF) || (c >= 0x10000 && c <= 0x1FFFD) || (c >= 0x20000 && c <= 0x2FFFD) || (c >= 0x30000 && c <= 0x3FFFD) || (c >= 0x40000 && c <= 0x4FFFD) || (c >= 0x50000 && c <= 0x5FFFD) || (c >= 0x60000 && c <= 0x6FFFD) || (c >= 0x70000 && c <= 0x7FFFD) || (c >= 0x80000 && c <= 0x8FFFD) || (c >= 0x90000 && c <= 0x9FFFD) || (c >= 0xA0000 && c <= 0xAFFFD) || (c >= 0xB0000 && c <= 0xBFFFD) || (c >= 0xC0000 && c <= 0xCFFFD) || (c >= 0xD0000 && c <= 0xDFFFD) || (c >= 0xE0000 && c <= 0xEFFFD); } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = false; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; constexpr auto i_unreserved_matcher = ascii_alphanum_matcher | ASCIIPatternMatcher{"-._~"} | UCSCharMatcher{}; constexpr auto sub_delims_matcher = ASCIIPatternMatcher{"!$&'()*+,;="}; struct IPrivateMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { return (c >= 0xE000 && c <= 0xF8FF) || (c >= 0xF0000 && c <= 0xFFFFD) || (c >= 0x100000 && c <= 0x10FFFD); } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = false; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; struct PNCharsBase_UnicodePartMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { return (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || (c >= 0xF8 && c <= 0x02FF) || (c >= 0x0370 && c <= 0x037D) || (c >= 0x037F && c <= 0x1FFF) || (c >= 0x200C && c <= 0x200D) || (c >= 0x2070 && c <= 0x218F) || (c >= 0x2C00 && c <= 0x2FEF) || (c >= 0x3001 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x00010000 && c <= 0x000EFFFF); } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = false; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; constexpr auto PNCharsBaseMatcher = ASCIIAlphaMatcher{} | PNCharsBase_UnicodePartMatcher{}; constexpr auto PNCharsUMatcher = ASCIIPatternMatcher{"_"} | PNCharsBaseMatcher; struct PNChars_UnicodePartMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { return c == 0xB7 || (c >= 0x0300 && c <= 0x036F) || (c >= 0x203F && c <= 0x2040); } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = false; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; constexpr auto PNCharsMatcher = ASCIINumMatcher{} | ASCIIPatternMatcher{"-"} | PNCharsUMatcher | PNChars_UnicodePartMatcher{}; namespace xml { struct NCNameStartChar_UnicodePartMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { return (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xF6) || (c >= 0xF8 && c <= 0x2FF) || (c >= 0x370 && c <= 0x37D) || (c >= 0x37F && c <= 0x1FFF) || (c >= 0x200C && c <= 0x200D) || (c >= 0x2070 && c <= 0x218F) || (c >= 0x2C00 && c <= 0x2FEF) || (c >= 0x3001 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFFD) || (c >= 0x10000 && c <= 0xEFFFF); } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = false; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; struct NCNameChar_UnicodePartMatcher { [[nodiscard]] static constexpr bool match(int c) noexcept { return c == 0xB7 || (c >= 0x0300 && c <= 0x036F) || (c >= 0x203F && c <= 0x2040); } static constexpr size_t simd_range_num = 0; static constexpr bool fail_if_unicode = false; [[nodiscard]] static consteval std::array simd_ranges() noexcept { return {}; } [[nodiscard]] static consteval auto simd_singles() noexcept { return datatypes::registry::util::ConstexprString(""); } }; constexpr auto NCNameStartChar = ASCIIAlphaMatcher{} | ASCIIPatternMatcher{"_"} | NCNameStartChar_UnicodePartMatcher{}; constexpr auto NCNameChar = ASCIIAlphaMatcher{} | ASCIINumMatcher{} | ASCIIPatternMatcher{"_-."} | NCNameStartChar_UnicodePartMatcher{} | NCNameChar_UnicodePartMatcher{}; } template bool match(std::string_view s) noexcept { auto ranges = m.simd_ranges(); static constexpr auto singles = m.simd_singles(); auto simd_r = try_match_simd(s, ranges, singles); if (simd_r.has_value()) { return *simd_r; } if constexpr (m.fail_if_unicode) { return false; } for (int c : s | utf8_range_decoder) { if (!m.match(c)) { return false; } } return true; } } // namespace rdf4cpp::util::char_matcher_detail #endif //RDF4CPP_CHARMATCHER_HPP