Program Listing for File IRIFactory.cpp¶
↰ Return to documentation for file (src/rdf4cpp/IRIFactory.cpp)
#include "IRIFactory.hpp"
#include <rdf4cpp/datatypes/registry/DatatypeRegistry.hpp>
#include <rdf4cpp/util/CharMatcher.hpp>
#include <uni_algo/all.h>
namespace rdf4cpp {
static std::string_view construct(std::string_view scheme, std::optional<std::string_view> auth, std::string_view path,
std::optional<std::string_view> query, std::optional<std::string_view> frag) noexcept {
static thread_local std::string str;
str.clear();
str.reserve(std::bit_ceil(scheme.size() + 1 + path.size()));
str.append(scheme);
str.push_back(':');
if (auth.has_value()) {
str.append("//");
str.append(*auth);
}
if (!path.empty() && !path.starts_with('/') && auth.has_value()) {
str.push_back('/');
}
str.append(path);
if (query.has_value()) {
str.push_back('?');
str.append(*query);
}
if (frag.has_value()) {
str.push_back('#');
str.append(*frag);
}
return str;
}
static std::string_view first_path_segment(std::string_view path) noexcept {
size_t off = 0;
if (path.starts_with('/'))
off = 1;
auto e = path.find('/', off);
return path.substr(0, e);
}
static void remove_last_path_segment(std::string &path) noexcept {
auto e = path.find_last_of('/');
if (e == std::string::npos)
return;
path.resize(e);
}
static std::string_view remove_dot_segments(std::string_view src) noexcept {
// adapted from https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
thread_local static std::string buf;
buf.clear();
buf.reserve(std::bit_ceil(src.size()));
while (!src.empty()) {
if (src.starts_with("./")) {
// 2.A
src.remove_prefix(2);
continue;
}
if (src.starts_with("../")) {
// 2.A
src.remove_prefix(3);
continue;
}
if (src.starts_with("/./")) {
// 2.B
src.remove_prefix(2);
continue;
}
if (src == "/.") {
// 2.B
// '[..] begins with a prefix of [..] "/." where "." is a complete path segment
// then replace that prefix with "/" in the input buffer [..]' (and continue)
// => "." is a complete path segment if either a slash follows (previous branch)
// or the path ends after it (this branch)
// => the next iteration would just append the '/' to the output buffer
// because none of the branches will be taken
// => therefore appending the slash directly and breaking out of the loop is equivalent
// to the given formulation from RFC 3986
buf.push_back('/');
break;
}
if (src.starts_with("/../")) {
// 2.C
src.remove_prefix(3);
remove_last_path_segment(buf);
continue;
}
if (src == "/..") {
// 2.C
// same reasoning as for 2.B
remove_last_path_segment(buf);
buf.push_back('/');
break;
}
if (src == ".." || src == ".") {
// 2.D
break;
}
// 2.E
auto const seg = first_path_segment(src);
buf.append(seg);
src.remove_prefix(seg.size());
}
return buf;
}
static std::string_view merge_path_with_base(IRIView::AllParts const &base, std::string_view path) noexcept {
static thread_local std::string r;
r.clear();
if (base.scheme.has_value() && base.path.empty()) {
r.reserve(std::bit_ceil(path.size() + 1));
r.push_back('/');
r.append(path);
return r;
}
r.reserve(std::bit_ceil(base.path.size() + path.size() + 1));
r.append(base.path);
remove_last_path_segment(r);
r.push_back('/');
r.append(path);
return r;
}
template<bool always_remove_dots>
static std::string_view to_absolute(IRIView::AllParts const &base, std::string_view rel) noexcept {
auto [r_scheme, r_auth, r_path, r_query, r_frag] = IRIView{rel}.all_parts();
if (r_scheme.has_value()) {
if constexpr(always_remove_dots) {
return construct(*r_scheme, r_auth, remove_dot_segments(r_path), r_query, r_frag);
}
else {
return rel;
}
}
auto const &[b_scheme, b_auth, b_path, b_query, _b_frag] = base;
if (r_auth.has_value()) {
return construct(*b_scheme, r_auth, remove_dot_segments(r_path), r_query, r_frag);
}
if (r_path.empty()) {
return construct(*b_scheme, b_auth, b_path, r_query.has_value() ? r_query : b_query, r_frag);
}
if (r_path.starts_with('/')) {
return construct(*b_scheme, b_auth, remove_dot_segments(r_path), r_query, r_frag);
}
auto const merged = merge_path_with_base(base, r_path);
return construct(*b_scheme, b_auth, remove_dot_segments(merged), r_query, r_frag);
}
IRIFactory::IRIFactory(std::string_view base) {
if (set_base(base) != IRIFactoryError::Ok) {
throw std::invalid_argument{"invalid base"};
}
}
IRIFactory::IRIFactory(prefix_map_type &&prefixes, std::string_view base) : prefixes(std::move(prefixes)) {
if (set_base(base) != IRIFactoryError::Ok) {
throw std::invalid_argument{"invalid base"};
}
}
nonstd::expected<IRI, IRIFactoryError> IRIFactory::from_relative(std::string_view rel, storage::DynNodeStoragePtr node_storage) const noexcept {
return create_and_validate(to_absolute<true>(base_parts_cache, rel), node_storage);
}
nonstd::expected<IRI, IRIFactoryError> IRIFactory::from_maybe_relative(std::string_view rel, storage::DynNodeStoragePtr node_storage) const noexcept {
return create_and_validate(to_absolute<false>(base_parts_cache, rel), node_storage);
}
nonstd::expected<IRI, IRIFactoryError> IRIFactory::from_prefix(std::string_view prefix, std::string_view local, storage::DynNodeStoragePtr node_storage) const {
auto i = prefixes.find(prefix);
if (i == prefixes.end()) {
return nonstd::make_unexpected(IRIFactoryError::UnknownPrefix);
}
static thread_local std::string deref;
deref.clear();
deref.reserve(i->second.size() + local.size());
deref.append(i->second);
deref.append(local);
if (IRIView{deref}.is_relative()) {
return from_relative(deref, node_storage);
}
return create_and_validate(deref, node_storage);
}
nonstd::expected<IRI, IRIFactoryError> IRIFactory::create_and_validate(std::string_view iri, storage::DynNodeStoragePtr node_storage) noexcept {
if (!rdf4cpp::datatypes::registry::relaxed_parsing_mode) {
if (auto const e = IRIView{iri}.quick_validate(); e != IRIFactoryError::Ok) {
return nonstd::make_unexpected(e);
}
}
return IRI::make_unchecked(iri, node_storage);
}
IRIFactoryError IRIFactory::assign_prefix(std::string_view prefix, std::string_view expanded) {
using namespace util::char_matcher_detail;
auto r = prefix | una::views::utf8;
auto it = r.begin();
if (it != r.end()) {
if (!PNCharsBaseMatcher.match(*it)) {
return IRIFactoryError::InvalidPrefix;
}
auto lastchar = *it;
++it;
static constexpr auto pn_matcher = PNCharsMatcher | ASCIIPatternMatcher{"."};
while (it != r.end()) {
if (!pn_matcher.match(*it)) {
return IRIFactoryError::InvalidPrefix;
}
lastchar = *it;
++it;
}
if (lastchar == '.') {
return IRIFactoryError::InvalidPrefix;
}
}
// checking expanded can only be done after the full IRI was created
assign_prefix_unchecked(prefix, expanded);
return IRIFactoryError::Ok;
}
void IRIFactory::assign_prefix_unchecked(std::string_view prefix, std::string_view expanded) {
std::string pre{prefix};
prefixes[pre] = expanded;
}
void IRIFactory::clear_prefix(std::string_view prefix) {
auto it = prefixes.find(prefix);
if (it == prefixes.end()) [[unlikely]] {
return;
}
prefixes.erase(it);
}
std::string_view IRIFactory::get_base() const noexcept {
return base;
}
IRIFactoryError IRIFactory::set_base(std::string_view b) noexcept {
if (!rdf4cpp::datatypes::registry::relaxed_parsing_mode) {
if (auto const e = IRIView{b}.quick_validate(); e != IRIFactoryError::Ok) {
return e;
}
}
base = b;
base_parts_cache = IRIView{base}.all_parts();
return IRIFactoryError::Ok;
}
void IRIFactory::set_base_unchecked(std::string_view b) noexcept {
base = b;
base_parts_cache = IRIView{base}.all_parts();
}
} // namespace rdf4cpp