.. _program_listing_file_src_rdf4cpp_IRIFactory.cpp: Program Listing for File IRIFactory.cpp ======================================= |exhale_lsh| :ref:`Return to documentation for file ` (``src/rdf4cpp/IRIFactory.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "IRIFactory.hpp" #include #include #include namespace rdf4cpp { static std::string_view construct(std::string_view scheme, std::optional auth, std::string_view path, std::optional query, std::optional frag) noexcept { static thread_local std::string str; str.clear(); str.reserve(std::bit_ceil(scheme.size() + 1 + path.size())); str.append(scheme); str.push_back(':'); if (auth.has_value()) { str.append("//"); str.append(*auth); } if (!path.empty() && !path.starts_with('/') && auth.has_value()) { str.push_back('/'); } str.append(path); if (query.has_value()) { str.push_back('?'); str.append(*query); } if (frag.has_value()) { str.push_back('#'); str.append(*frag); } return str; } static std::string_view first_path_segment(std::string_view path) noexcept { size_t off = 0; if (path.starts_with('/')) off = 1; auto e = path.find('/', off); return path.substr(0, e); } static void remove_last_path_segment(std::string &path) noexcept { auto e = path.find_last_of('/'); if (e == std::string::npos) return; path.resize(e); } static std::string_view remove_dot_segments(std::string_view src) noexcept { // adapted from https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 thread_local static std::string buf; buf.clear(); buf.reserve(std::bit_ceil(src.size())); while (!src.empty()) { if (src.starts_with("./")) { // 2.A src.remove_prefix(2); continue; } if (src.starts_with("../")) { // 2.A src.remove_prefix(3); continue; } if (src.starts_with("/./")) { // 2.B src.remove_prefix(2); continue; } if (src == "/.") { // 2.B // '[..] begins with a prefix of [..] "/." where "." is a complete path segment // then replace that prefix with "/" in the input buffer [..]' (and continue) // => "." is a complete path segment if either a slash follows (previous branch) // or the path ends after it (this branch) // => the next iteration would just append the '/' to the output buffer // because none of the branches will be taken // => therefore appending the slash directly and breaking out of the loop is equivalent // to the given formulation from RFC 3986 buf.push_back('/'); break; } if (src.starts_with("/../")) { // 2.C src.remove_prefix(3); remove_last_path_segment(buf); continue; } if (src == "/..") { // 2.C // same reasoning as for 2.B remove_last_path_segment(buf); buf.push_back('/'); break; } if (src == ".." || src == ".") { // 2.D break; } // 2.E auto const seg = first_path_segment(src); buf.append(seg); src.remove_prefix(seg.size()); } return buf; } static std::string_view merge_path_with_base(IRIView::AllParts const &base, std::string_view path) noexcept { static thread_local std::string r; r.clear(); if (base.scheme.has_value() && base.path.empty()) { r.reserve(std::bit_ceil(path.size() + 1)); r.push_back('/'); r.append(path); return r; } r.reserve(std::bit_ceil(base.path.size() + path.size() + 1)); r.append(base.path); remove_last_path_segment(r); r.push_back('/'); r.append(path); return r; } template static std::string_view to_absolute(IRIView::AllParts const &base, std::string_view rel) noexcept { auto [r_scheme, r_auth, r_path, r_query, r_frag] = IRIView{rel}.all_parts(); if (r_scheme.has_value()) { if constexpr(always_remove_dots) { return construct(*r_scheme, r_auth, remove_dot_segments(r_path), r_query, r_frag); } else { return rel; } } auto const &[b_scheme, b_auth, b_path, b_query, _b_frag] = base; if (r_auth.has_value()) { return construct(*b_scheme, r_auth, remove_dot_segments(r_path), r_query, r_frag); } if (r_path.empty()) { return construct(*b_scheme, b_auth, b_path, r_query.has_value() ? r_query : b_query, r_frag); } if (r_path.starts_with('/')) { return construct(*b_scheme, b_auth, remove_dot_segments(r_path), r_query, r_frag); } auto const merged = merge_path_with_base(base, r_path); return construct(*b_scheme, b_auth, remove_dot_segments(merged), r_query, r_frag); } IRIFactory::IRIFactory(std::string_view base) { if (set_base(base) != IRIFactoryError::Ok) { throw std::invalid_argument{"invalid base"}; } } IRIFactory::IRIFactory(prefix_map_type &&prefixes, std::string_view base) : prefixes(std::move(prefixes)) { if (set_base(base) != IRIFactoryError::Ok) { throw std::invalid_argument{"invalid base"}; } } nonstd::expected IRIFactory::from_relative(std::string_view rel, storage::DynNodeStoragePtr node_storage) const noexcept { return create_and_validate(to_absolute(base_parts_cache, rel), node_storage); } nonstd::expected IRIFactory::from_maybe_relative(std::string_view rel, storage::DynNodeStoragePtr node_storage) const noexcept { return create_and_validate(to_absolute(base_parts_cache, rel), node_storage); } nonstd::expected IRIFactory::from_prefix(std::string_view prefix, std::string_view local, storage::DynNodeStoragePtr node_storage) const { auto i = prefixes.find(prefix); if (i == prefixes.end()) { return nonstd::make_unexpected(IRIFactoryError::UnknownPrefix); } static thread_local std::string deref; deref.clear(); deref.reserve(i->second.size() + local.size()); deref.append(i->second); deref.append(local); if (IRIView{deref}.is_relative()) { return from_relative(deref, node_storage); } return create_and_validate(deref, node_storage); } nonstd::expected IRIFactory::create_and_validate(std::string_view iri, storage::DynNodeStoragePtr node_storage) noexcept { if (!rdf4cpp::datatypes::registry::relaxed_parsing_mode) { if (auto const e = IRIView{iri}.quick_validate(); e != IRIFactoryError::Ok) { return nonstd::make_unexpected(e); } } return IRI::make_unchecked(iri, node_storage); } IRIFactoryError IRIFactory::assign_prefix(std::string_view prefix, std::string_view expanded) { using namespace util::char_matcher_detail; auto r = prefix | una::views::utf8; auto it = r.begin(); if (it != r.end()) { if (!PNCharsBaseMatcher.match(*it)) { return IRIFactoryError::InvalidPrefix; } auto lastchar = *it; ++it; static constexpr auto pn_matcher = PNCharsMatcher | ASCIIPatternMatcher{"."}; while (it != r.end()) { if (!pn_matcher.match(*it)) { return IRIFactoryError::InvalidPrefix; } lastchar = *it; ++it; } if (lastchar == '.') { return IRIFactoryError::InvalidPrefix; } } // checking expanded can only be done after the full IRI was created assign_prefix_unchecked(prefix, expanded); return IRIFactoryError::Ok; } void IRIFactory::assign_prefix_unchecked(std::string_view prefix, std::string_view expanded) { std::string pre{prefix}; prefixes[pre] = expanded; } void IRIFactory::clear_prefix(std::string_view prefix) { auto it = prefixes.find(prefix); if (it == prefixes.end()) [[unlikely]] { return; } prefixes.erase(it); } std::string_view IRIFactory::get_base() const noexcept { return base; } IRIFactoryError IRIFactory::set_base(std::string_view b) noexcept { if (!rdf4cpp::datatypes::registry::relaxed_parsing_mode) { if (auto const e = IRIView{b}.quick_validate(); e != IRIFactoryError::Ok) { return e; } } base = b; base_parts_cache = IRIView{base}.all_parts(); return IRIFactoryError::Ok; } void IRIFactory::set_base_unchecked(std::string_view b) noexcept { base = b; base_parts_cache = IRIView{base}.all_parts(); } } // namespace rdf4cpp