Source code for calamus.utils

# -*- coding: utf-8 -*-
#
# Copyright 2017-2020- Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Calamus utilities."""

import types
import typing

from lazy_object_proxy.compat import with_metaclass
from lazy_object_proxy.slots import Proxy as LazyProxy
from lazy_object_proxy.slots import _ProxyMetaType

JSON_LD_SYNTAX_TOKENS = [
    "@id",
    "@type",
    "@base",
    "@container",
    "@context",
    "@direction",
    "@graph",
    "@import",
    "@included",
    "@index",
    "@json",
    "@language",
    "@list",
    "@nest",
    "@none",
    "@prefix",
    "@propagate",
    "@protected",
    "@reverse",
    "@set",
    "@value",
    "@version",
    "@vocab",
]


def _get_ontology_query():
    """prepareQuery is rather slow, so we wrap it in a lazy proxy to only execute when needed."""
    from rdflib.plugins.sparql import prepareQuery

    return prepareQuery(
        "ask { { ?property rdf:type <http://www.w3.org/2002/07/owl#DatatypeProperty> .} UNION { ?property rdf:type "
        "<http://www.w3.org/2002/07/owl#ObjectProperty> .} }"
    )


ONTOLOGY_QUERY = LazyProxy(_get_ontology_query)


[docs]def normalize_id( id_object: typing.Union[typing.Mapping[str, typing.Any], typing.Iterable[typing.Mapping[str, typing.Any]], str] ): """Turns a JsonLD id reference into normalized form (list of strings).""" if isinstance(id_object, str): return [id_object] if isinstance(id_object, dict): if "@id" not in id_object: raise ValueError("No @id found in id object") return [id_object["@id"]] if isinstance(id_object, list) or isinstance(id_object, types.GeneratorType): return [i for o in id_object for i in normalize_id(o)] return [str(id_object)]
[docs]def normalize_type(type_data): """Normalizes a JsonLD type reference as list of string.""" if isinstance(type_data, list) or isinstance(type_data, types.GeneratorType): return sorted([t for e in type_data for t in normalize_type(e)]) if isinstance(type_data, str): return [type_data] return [str(type_data)]
[docs]def normalize_value(value): """Normalizes a JsonLD value object to a simple value.""" if isinstance(value, list): if len(value) == 1: # single values can be single element lists in jsonld return normalize_value(value[0]) return [normalize_value(v) for v in value] if isinstance(value, dict) and "@value" in value: return value["@value"] return value
[docs]def validate_field_properties(data, ontology, query=None, mem={"valid": set(), "invalid": set()}): """Validates if the field properties for data are present in the OWL ontology graph or not. Args: data (dict): The data to validate. ontology (``rdflib.Graph``): The OWL ontology graph to validate against. query (``rdflib.plugins.sparql.sparql.prepareQuery``): Optional prepared query (for performance reasons). mem (dict): memoization for repeated calls. Returns: dict: Key ``valid`` has all valid properties (excluding @id and @type), ``invalid`` has all invalid properties """ from rdflib.graph import Graph from rdflib.term import URIRef res = {"valid": set(), "invalid": set()} if query is None: query = ONTOLOGY_QUERY if not isinstance(data, dict): raise ValueError("`data` must be a dict.") if not isinstance(ontology, Graph): raise ValueError("`graph` must be an `rdflib.Graph`") for prop in data.keys(): if prop in mem["valid"]: res["valid"].add(prop) continue if prop in mem["invalid"]: res["invalid"].add(prop) continue # after checking memoization if prop not in JSON_LD_SYNTAX_TOKENS: p = URIRef(prop) qres = ontology.query(query, initBindings={"property": p}) if next(iter(qres), False): res["valid"].add(prop) else: res["invalid"].add(prop) return res
class Proxy(LazyProxy, with_metaclass(_ProxyMetaType)): """Proxy object to support lazy loading.""" __slots__ = "__target__", "__factory__", "__proxy_initialized__", "__proxy_schema__", "__proxy_original_data__" def __init__(self, factory, schema, original_data): object.__setattr__(self, "__factory__", factory) object.__setattr__(self, "__proxy_initialized__", False) object.__setattr__(self, "__proxy_schema__", schema) object.__setattr__(self, "__proxy_original_data__", original_data) @property def __wrapped__( self, __getattr__=object.__getattribute__, __setattr__=object.__setattr__, __delattr__=object.__delattr__ ): try: return __getattr__(self, "__target__") except AttributeError: try: factory = __getattr__(self, "__factory__") except AttributeError: raise ValueError("Proxy hasn't been initiated: __factory__ is missing.") target = factory() __setattr__(self, "__target__", target) __setattr__(self, "__proxy_initialized__", True) return target def __setattr__(self, name, value, __setattr__=object.__setattr__): if hasattr(type(self), name): __setattr__(self, name, value) else: setattr(self.__wrapped__, name, value) def __getattr__(self, name): if name in ( "__wrapped__", "__factory__", "__proxy_initialized__", "__proxy_schema__", "__proxy_original_data__", ): raise AttributeError(name) else: return getattr(self.__wrapped__, name)