Program Listing for File CharMatcher.hpp¶
↰ Return to documentation for file (src/rdf4cpp/util/CharMatcher.hpp)
#ifndef RDF4CPP_CHARMATCHER_HPP
#define RDF4CPP_CHARMATCHER_HPP
#include <array>
#include <optional>
#include <string_view>
#include <rdf4cpp/datatypes/registry/util/ConstexprString.hpp>
namespace rdf4cpp::util::char_matcher_detail {
struct CharRange {
char first = '\0';
char last = '\0';
};
template<typename T>
concept CharMatcher = requires(T const a, int c) {
{
a.match(c)
} -> std::convertible_to<bool>;
{
T::simd_range_num
} -> std::convertible_to<size_t>;
{
T::fail_if_unicode
} -> std::convertible_to<bool>;
{
a.simd_ranges()
} -> std::same_as<std::array<CharRange, T::simd_range_num>>;
{
a.simd_singles()
} -> std::convertible_to<std::string_view>;
};
template<size_t rn, size_t sn>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, rn> const &ranges, datatypes::registry::util::ConstexprString<sn> const &single) = delete;
template<>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, 3> const &ranges, datatypes::registry::util::ConstexprString<1> const &single);
template<>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, 3> const &ranges, datatypes::registry::util::ConstexprString<4> const &single);
template<>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, 3> const &ranges, datatypes::registry::util::ConstexprString<18> const &single);
template<>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, 3> const &ranges, datatypes::registry::util::ConstexprString<20> const &single);
template<>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, 3> const &ranges, datatypes::registry::util::ConstexprString<21> const &single);
template<>
std::optional<bool> try_match_simd(std::string_view data, std::array<CharRange, 1> const &ranges, datatypes::registry::util::ConstexprString<1> const &single);
template<size_t n>
bool contains_any(std::string_view data, datatypes::registry::util::ConstexprString<n> const &match) = delete;
template<>
bool contains_any(std::string_view data, datatypes::registry::util::ConstexprString<5> const &match);
template<size_t n>
struct ASCIIPatternMatcher {
datatypes::registry::util::ConstexprString<n> pattern;
explicit constexpr ASCIIPatternMatcher(char const (&str)[n]) noexcept
: pattern(str) {
}
[[nodiscard]] constexpr bool match(int c) const noexcept {
auto ch = static_cast<char>(c);
if (c != static_cast<int>(ch)) // not asciii
return false;
return static_cast<std::string_view>(pattern).find(ch) != std::string_view::npos;
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = true;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] consteval auto simd_singles() const noexcept {
return pattern;
}
};
struct ASCIINumMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
auto ch = static_cast<char>(c);
if (c != static_cast<int>(ch)) // not asciii
return false;
return c >= '0' && c <= '9';
}
static constexpr size_t simd_range_num = 1;
static constexpr bool fail_if_unicode = true;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return std::array<CharRange, simd_range_num>{CharRange{'0', '9'}};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
struct ASCIIAlphaMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
auto ch = static_cast<char>(c);
if (c != static_cast<int>(ch)) // not asciii
return false;
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static constexpr size_t simd_range_num = 2;
static constexpr bool fail_if_unicode = true;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return std::array<CharRange, simd_range_num>{
CharRange{'a', 'z'},
CharRange{'A', 'Z'}};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
template<CharMatcher A, CharMatcher B>
struct OrMatcher {
A a;
B b;
constexpr OrMatcher(A a, B b) : a(a), b(b) {}
constexpr OrMatcher() = default;
[[nodiscard]] constexpr bool match(int c) const noexcept {
return a.match(c) || b.match(c);
}
static constexpr size_t simd_range_num = A::simd_range_num + B::simd_range_num;
static constexpr bool fail_if_unicode = A::fail_if_unicode && B::fail_if_unicode;
[[nodiscard]] consteval std::array<CharRange, simd_range_num> simd_ranges() const noexcept {
std::array<CharRange, simd_range_num> r{};
if constexpr (A::simd_range_num > 0) {
auto aa = a.simd_ranges();
for (size_t s = 0; s < A::simd_range_num; ++s) {
r[s] = aa[s];
}
}
if constexpr (B::simd_range_num > 0) {
auto ba = b.simd_ranges();
for (size_t s = 0; s < B::simd_range_num; ++s) {
r[s + A::simd_range_num] = ba[s];
}
}
return r;
}
[[nodiscard]] consteval auto simd_singles() const noexcept {
return a.simd_singles() + b.simd_singles();
}
};
template<CharMatcher A, CharMatcher B>
constexpr OrMatcher<A, B> operator|(A a, B b) {
return OrMatcher{a, b};
}
constexpr auto ascii_alphanum_matcher = ASCIIAlphaMatcher{} | ASCIINumMatcher{};
struct UCSCharMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
return (c >= 0xA0 && c <= 0xD7FF) ||
(c >= 0xF900 && c <= 0xFDCF) ||
(c >= 0xFDF0 && c <= 0xFFEF) ||
(c >= 0x10000 && c <= 0x1FFFD) ||
(c >= 0x20000 && c <= 0x2FFFD) ||
(c >= 0x30000 && c <= 0x3FFFD) ||
(c >= 0x40000 && c <= 0x4FFFD) ||
(c >= 0x50000 && c <= 0x5FFFD) ||
(c >= 0x60000 && c <= 0x6FFFD) ||
(c >= 0x70000 && c <= 0x7FFFD) ||
(c >= 0x80000 && c <= 0x8FFFD) ||
(c >= 0x90000 && c <= 0x9FFFD) ||
(c >= 0xA0000 && c <= 0xAFFFD) ||
(c >= 0xB0000 && c <= 0xBFFFD) ||
(c >= 0xC0000 && c <= 0xCFFFD) ||
(c >= 0xD0000 && c <= 0xDFFFD) ||
(c >= 0xE0000 && c <= 0xEFFFD);
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = false;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
constexpr auto i_unreserved_matcher = ascii_alphanum_matcher | ASCIIPatternMatcher{"-._~"} | UCSCharMatcher{};
constexpr auto sub_delims_matcher = ASCIIPatternMatcher{"!$&'()*+,;="};
struct IPrivateMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
return (c >= 0xE000 && c <= 0xF8FF) ||
(c >= 0xF0000 && c <= 0xFFFFD) ||
(c >= 0x100000 && c <= 0x10FFFD);
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = false;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
struct PNCharsBase_UnicodePartMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
return (c >= 0xC0 && c <= 0xD6) ||
(c >= 0xD8 && c <= 0xF6) ||
(c >= 0xF8 && c <= 0x02FF) ||
(c >= 0x0370 && c <= 0x037D) ||
(c >= 0x037F && c <= 0x1FFF) ||
(c >= 0x200C && c <= 0x200D) ||
(c >= 0x2070 && c <= 0x218F) ||
(c >= 0x2C00 && c <= 0x2FEF) ||
(c >= 0x3001 && c <= 0xD7FF) ||
(c >= 0xF900 && c <= 0xFDCF) ||
(c >= 0xFDF0 && c <= 0xFFFD) ||
(c >= 0x00010000 && c <= 0x000EFFFF);
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = false;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
constexpr auto PNCharsBaseMatcher = ASCIIAlphaMatcher{} | PNCharsBase_UnicodePartMatcher{};
constexpr auto PNCharsUMatcher = ASCIIPatternMatcher{"_"} | PNCharsBaseMatcher;
struct PNChars_UnicodePartMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
return c == 0xB7 ||
(c >= 0x0300 && c <= 0x036F) ||
(c >= 0x203F && c <= 0x2040);
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = false;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
constexpr auto PNCharsMatcher = ASCIINumMatcher{} | ASCIIPatternMatcher{"-"} | PNCharsUMatcher | PNChars_UnicodePartMatcher{};
namespace xml {
struct NCNameStartChar_UnicodePartMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
return (c >= 0xC0 && c <= 0xD6) ||
(c >= 0xD8 && c <= 0xF6) ||
(c >= 0xF8 && c <= 0x2FF) ||
(c >= 0x370 && c <= 0x37D) ||
(c >= 0x37F && c <= 0x1FFF) ||
(c >= 0x200C && c <= 0x200D) ||
(c >= 0x2070 && c <= 0x218F) ||
(c >= 0x2C00 && c <= 0x2FEF) ||
(c >= 0x3001 && c <= 0xD7FF) ||
(c >= 0xF900 && c <= 0xFDCF) ||
(c >= 0xFDF0 && c <= 0xFFFD) ||
(c >= 0x10000 && c <= 0xEFFFF);
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = false;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
struct NCNameChar_UnicodePartMatcher {
[[nodiscard]] static constexpr bool match(int c) noexcept {
return c == 0xB7 ||
(c >= 0x0300 && c <= 0x036F) ||
(c >= 0x203F && c <= 0x2040);
}
static constexpr size_t simd_range_num = 0;
static constexpr bool fail_if_unicode = false;
[[nodiscard]] static consteval std::array<CharRange, simd_range_num> simd_ranges() noexcept {
return {};
}
[[nodiscard]] static consteval auto simd_singles() noexcept {
return datatypes::registry::util::ConstexprString("");
}
};
constexpr auto NCNameStartChar = ASCIIAlphaMatcher{} | ASCIIPatternMatcher{"_"} | NCNameStartChar_UnicodePartMatcher{};
constexpr auto NCNameChar = ASCIIAlphaMatcher{} | ASCIINumMatcher{} | ASCIIPatternMatcher{"_-."} | NCNameStartChar_UnicodePartMatcher{} | NCNameChar_UnicodePartMatcher{};
}
template<auto const &m, auto utf8_range_decoder>
bool match(std::string_view s) noexcept {
auto ranges = m.simd_ranges();
static constexpr auto singles = m.simd_singles();
auto simd_r = try_match_simd(s, ranges, singles);
if (simd_r.has_value()) {
return *simd_r;
}
if constexpr (m.fail_if_unicode) {
return false;
}
for (int c : s | utf8_range_decoder) {
if (!m.match(c)) {
return false;
}
}
return true;
}
} // namespace rdf4cpp::util::char_matcher_detail
#endif //RDF4CPP_CHARMATCHER_HPP