/* Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@vu.nl WWW: http://www.swi-prolog.org Copyright (c) 2010-2018, University of Amsterdam All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ :- module(serql, [ serql_query/2, % +Query, -Result serql_query/3, % +Query, -Result, +Options serql_compile/3, % +Query, -Compiled, +Options serql_run/2 % +Compiled, -Reply ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdf_optimise)). :- use_module(library(lists)). :- use_module(library(option)). :- use_module(library(debug)). :- use_module(library(settings)). :- use_module(rdfql_util). :- include(entailment(load)). :- meta_predicate select_results(+,-,0). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - A Prolog path expression is a conjunction of rdf/3 statements. Parts may be wrapped in opt/1 to indicate they are optional and nodes may be of the form set(List) to indicate a conjunction of distinct values. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ %! serql_query(+Query, -Reply, +Module) % % Where Query is either a SeRQL query text or a parsed query. % Reply is, similar to the ODBC interface a term of the form % row(Col1, Col2, ...) for SELECT statements or a term rdf(S,P,O). % The predicate is non-deterministic, returning the rows or RDF % statements one-by-one. serql_query(Query, Result) :- serql_query(Query, Result, [ entailment(rdf) ]). serql_query(Query, Result, Options) :- serql_compile(Query, Compiled, Options), serql_run(Compiled, Result). %! serql_compile(+Query, -Compiled, +Options) % % Compile a SeRQL query, returning the result in Compiled. Options: % % * entailment(Entailment) % Entailment module to use. % % * type(-Type) % Return one of select(VarNames) or construct serql_compile(Text, Compiled, Options) :- atom(Options), Options \== [], % compatibility !, serql_compile(Text, Compiled, [entailment(Options)]). serql_compile(Text, serql_query(Goal, ReplyTempl, Module), Options) :- serql_parse(Text, Query), compile(Query, Goal, ReplyTempl, Module, Options). compile(select(Row0, VarNames, Path, Where, Distinct, Limit, Offset), select(Final, Solutions), Row, Module, Options) :- option(entailment(Entailment), Options, rdfs), entailment_module(Entailment, Module), mk_solutions(Distinct, Limit, Offset, Solutions), set_type(select(VarNames), Options), where_constraints(Where, Annotations), serql_compile_path(Path, select, Goal), remove_annotations(Annotations, where), projection_functions(Row0, Row, Select), ( setting(cliopatria:optimise_query, Def), option(optimise(Opt), Options, Def), Opt == true -> rdf_optimise((Goal,Where,Select), Optimised) ; Optimised = (Goal,Where,Select) ), serql_select_bind_null(Optimised, Final), debug(serql(compiled), '~@', [ portray_clause((q(Row) :- Final)) ]). compile(construct(RPath, Path, Where, Distinct, Limit, Offset), construct(Final, Solutions), RDF, Module, Options) :- option(entailment(Entailment), Options, rdfs), entailment_module(Entailment, Module), mk_solutions(Distinct, Limit, Offset, Solutions), set_type(construct, Options), where_constraints(Where, Annotations), serql_compile_path(Path, construct, Goal), remove_annotations(Annotations, where), statements(RPath, Statements), entailment_module(Entailment, Module), ( setting(cliopatria:optimise_query, Def), option(optimise(Opt), Options, Def), Opt == true -> rdf_optimise((Goal,Where), Optimised) ; Optimised = (Goal,Where) ), Final = (Optimised, serql_member_statement(RDF, Statements)), debug(serql(compiled), '~@', [ portray_clause((q(RDF) :- Final)) ]). %! mk_solutions(+Distinct, +Limit, +Offset, -Term) % % Create a solutions-selecting term compatible to SPARQL. mk_solutions(distinct, Limit, Offset, distinct(solutions(unsorted, Limit, Offset))) :- !. mk_solutions(_, Limit, Offset, solutions(unsorted, Limit, Offset)). %! set_type(+Type, +Options) % % Fill option type(X) set_type(Type, Options) :- memberchk(type(T), Options), !, ( T = Type -> true ; functor(T, Expected, _), functor(Type, Found, _), throw(error(type_error(query_type(Expected), Found), _)) ). set_type(_, _). %! serql_run(+Term, -Result) serql_run(serql_query(Parsed, Reply, Module), Reply) :- serql_run(Parsed, Reply, Module). serql_run(select(Goal, Solutions), Reply, Module) :- select_results(Solutions, Reply, Module:Goal). serql_run(construct(Goal, Solutions), Reply, Module) :- select_results(Solutions, Reply, Module:Goal). %! select_results(+Spec, -Reply, :Goal) % % Apply ordering and limits on result-set. select_results(distinct(solutions(Order, Limit, Offset)), Reply, Goal) :- !, select_results(distinct, Offset, Limit, Order, Reply, Goal). select_results(solutions(Order, Limit, Offset), Reply, Goal) :- select_results(all, Offset, Limit, Order, Reply, Goal). /******************************* * COMPILER * *******************************/ %! serql_compile_path(+PathExpr, +Type, -PrologGoal) % % Compile a Serql path expression into a plain Prolog goal. Type % is one of 'select' or 'construct'. serql_compile_path(rdf(S,P,O), Type, Conj) :- set(S, Set), !, make_set_subj_conj(Set, [], P, O, Type, Conj). serql_compile_path(rdf(S,P,O), Type, Conj) :- set(O, Set), !, make_set_obj_conj(Set, [], S, P, Type, Conj). serql_compile_path(rdf(S0, P, O), Type, Goal) :- reified(S0, S, GS), !, serql_compile_path(rdf(S, P, O), Type, G0), Goal = (G0, GS). serql_compile_path(rdf(S, P, O0), Type, Goal) :- reified(O0, O, GS), !, serql_compile_path(rdf(S, P, O), Type, G0), Goal = (G0, GS). serql_compile_path((A0,B0), Type, (A,B)) :- !, serql_compile_path(A0, Type, A), serql_compile_path(B0, Type, B). serql_compile_path(optional(Id, A0), construct, (A *-> Id=true ; Id=false)) :- !, serql_compile_path(A0, construct, A). serql_compile_path(optional(_, A0), select, (A *-> true ; true)) :- !, serql_compile_path(A0, select, A). serql_compile_path(rdf(S,P,O0), _, Goal) :- !, resource_annotations(S, GS), resource_annotations(P, GP), object_annotations(O0, O, GO), clean_conj((GS, GP, rdf(S,P,O), GO), Goal). serql_compile_path(G, _, G). reified(0, _, _) :- % catch variables !, fail. reified(rdf(S,P,O), StatementId, ( rdf(StatementId, Type, Statement), rdf(StatementId, Subject, S), rdf(StatementId, Predicate, P), rdf(StatementId, Object, O) )) :- rdf_equal(Type, rdf:type), rdf_equal(Subject, rdf:subject), rdf_equal(Predicate, rdf:predicate), rdf_equal(Object, rdf:object), rdf_equal(Statement, rdf:'Statement'). make_set_subj_conj([], _, _, _, _, true). % should not happen make_set_subj_conj([Last], [], P, O, Type, Goal) :- !, serql_compile_path(rdf(Last, P, O), Type, Goal). make_set_subj_conj([Last], Diff, P, O, Type, (Goal, Diffs)) :- !, serql_compile_path(rdf(Last, P, O), Type, Goal), make_diff(Diff, Last, Diffs). make_set_subj_conj([H|T], Diff, P, O, Type, (Goal, Diffs, More)) :- !, serql_compile_path(rdf(H, P, O), Type, Goal), make_diff(Diff, H, Diffs), make_set_subj_conj(T, [H|Diff], P, O, Type, More). make_set_obj_conj([], _, _, _, _, true). % should not happen make_set_obj_conj([Last], [], S, P, Type, Goal) :- !, serql_compile_path(rdf(S, P, Last), Type, Goal). make_set_obj_conj([Last], Diff, S, P, Type, (Goal, Diffs)) :- !, serql_compile_path(rdf(S, P, Last), Type, Goal), make_diff(Diff, Last, Diffs). make_set_obj_conj([H|T], Diff, S, P, Type, (Goal, Diffs, More)) :- !, serql_compile_path(rdf(S, P, H), Type, Goal), make_diff(Diff, H, Diffs), make_set_obj_conj(T, [H|Diff], S, P, Type, More). make_diff([], _, true). make_diff([Last], To, (Last \== To)) :- !. make_diff([H|T], To, (H \== To, More)) :- make_diff(T, To, More). %! statements(+Graph, -ListOfTriples) % % Extract a plain list of triples from an CONSTRUCT % path-expression. Optional parts of the tree are represented as %! optional(Bool, ListOfTriples). Using CONSTRUCT * (i.e. when the % executed path is the result path) the goal generated by the % compiler will unify Bool with true or false. See also % member_statement/2. statements(Graph, Statements) :- phrase(statements(Graph), Statements). statements(rdf(S,P,O)) --> { set(S, Set) }, !, subj_statements(Set, P, O). statements(rdf(S,P0,O)) --> !, { nonvar(P0), map_builtin(P0, P) -> true ; P = P0 }, [ rdf(S,P,O) ]. statements((A,B)) --> !, statements(A), statements(B). statements(optional(Id, A)) --> !, { phrase(statements(A), OptionalStatements) }, [ optional(Id, OptionalStatements) ]. statements(_) --> []. term_expansion(map_builtin(B0, P0), map_builtin(B, P)) :- rdf_global_id(B0, B), rdf_global_id(P0, P). map_builtin(serql:directSubClassOf, rdfs:subClassOf). map_builtin(serql:directSubPropertyOf, rdfs:subPropertyOf). map_builtin(serql:directType, rdf:type). subj_statements([], _, _) --> []. subj_statements([H|T], P, O) --> ( { set(O, Set) } -> obj_statements(Set, H, P) ; [ rdf(H, P, O) ] ), subj_statements(T, P, O). obj_statements([], _, _) --> []. obj_statements([H|T], S, P) --> [ rdf(S, P, H) ], obj_statements(T, S, P). set(Node, Set) :- nonvar(Node), Node = set(Set). /******************************* * SELECT FUNCTIONS * *******************************/ projection_functions(Row0, Row, Map) :- functor(Row0, Functor, Arity), functor(Row, Functor, Arity), projection_functions(0, Arity, Row0, Row, true, Map). projection_functions(Arity, Arity, _, _, Map, Map) :- !. projection_functions(I0, Arity, Row0, Row, Map0, Map) :- I is I0 + 1, arg(I, Row0, A0), ( var(A0) -> arg(I, Row, A0), projection_functions(I, Arity, Row0, Row, Map0, Map) ; arg(I, Row, A), add_conj(Map0, serql_eval(A0, A), Map1), projection_functions(I, Arity, Row0, Row, Map1, Map) ). add_conj(true, X, X) :- !. add_conj(C0, G, (C0,G)). /******************************* * WHERE CONSTRAINTS * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The idea of this step is to turn where clauses into constraints on variables. Supported annotations (in standard order of terms): any literal resource eq(Value) like(Pattern) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ %! where_constraints(+Goal, -Annotations) % % Each annotation is either a plain annotation or a term % or(ListOfAlternatives). The latter is used if different paths % through the control-structure yields different annotations. where_constraints(Goal, Annotations) :- bagof(Annot, where_constraint_list(Goal, Annot), AltAnnots), sort_lol(AltAnnots, AltAnnots1), join_alt_annots(AltAnnots1, Annotations). %! where_constraint_list(+Goal, -Annotations) % % Interpret Goal, making annotations on the variables. % Backtracking yields alternative annotations due to choicepoints % in Goal. where_constraint_list(Goal, Annotations) :- where_constraints(Goal, AttrVars, []), attrs_to_terms(AttrVars, Annotations). where_constraints((A,B)) --> !, where_constraints(A), where_constraints(B). where_constraints((A;B)) --> !, ( where_constraints(A) ; where_constraints(B) ). where_constraints(serql_compare(like, Var, Pattern)) --> !, constrain(Var, like(Pattern)). where_constraints(serql_compare(=, Var, Value)) --> !, constrain(Var, eq(Value)). where_constraints(rdf_is_literal(V)) --> !, constrain(V, literal). where_constraints(rdf_is_resource(V)) --> !, constrain(V, resource). where_constraints(rdf(S,P,_)) --> !, constrain(S, resource), constrain(P, resource). where_constraints(_) --> []. constrain(Var, Cond) --> { var(Var) }, !, ( { get_attr(Var, where, C0) } -> { put_attr(Var, where, (Cond, C0)) }, [] ; { put_attr(Var, where, Cond) }, [ Var ] ). constrain(label(X), Cond) --> !, constrain(X, (literal, Cond)). constrain(lang(X), Cond) --> !, constrain(X, (literal, Cond)). constrain(datatype(X), Cond) --> !, constrain(X, (literal, Cond)). constrain(_, _) --> []. %! join_alt_annots(+ListOfAnnotLists, -AnnotatedVars) % % ListOfAnnotLists is a list of alternative annotations due to % choicepoints. Each annotation list represents annotations in the % form Var = Annotation. AnnotatedVars is a list of variables with % attributes representing their annotations. join_alt_annots(LoL, Annotated) :- smallest_var(LoL, Var), !, var_annotations(Var, LoL, LoL1, Annotations0), sort(Annotations0, Annotations), % remove duplicates ( empty_annotations(Annotations) -> join_alt_annots(LoL1, Annotated) ; put_annotations(Annotations, Var), Annotated = [Var|T], join_alt_annots(LoL1, T) ). join_alt_annots(LoL, []) :- assertion(maplist(=([]), LoL)). %! normalise_annotation(+A0, -A) % % Create a normalised version of an annotation for easy % processing. Currently only deals with annotations that are a % conjunction. normalise_annotation(A0, A) :- conj_to_list(A0, L0, []), sort(L0, L), list_do_conj(L, A). conj_to_list((A,B)) --> !, conj_to_list(A), conj_to_list(B). conj_to_list(A) --> [A]. list_do_conj([], any). list_do_conj([H], H) :- !. list_do_conj([H|T0], (H,T)) :- list_do_conj(T0, T). %! empty_annotations(+List) % % True if there is no sensible conclusion we can draw using the % annotations found. This is often the case if multiple paths in a % disjunction do not deal with all variables. Note that this is % not necessarily the end of the story. We could rewrite % % A,(C1;C2) into (A,C1);(A,C2) % % And apply optimisation on both branches. empty_annotations([]) :- !. empty_annotations(List) :- memberchk(any, List). put_annotations([], _). put_annotations([One], Var) :- !, put_attr(Var, where, One). put_annotations(More, Var) :- put_attr(Var, where, or(More)). %! smallest_var(+ListOfList, -Smallest) % % Get the smallest (in standard order of terms) annotated % variable. smallest_var([[S0=_|_]|T], Smallest) :- smallest_var(T, S0, Smallest). smallest_var([[]|T], Smallest) :- smallest_var(T, Smallest). smallest_var([], S, S). smallest_var([[S1=_|_]|T], S0, S) :- !, smallest(S1, S0, S2), smallest_var(T, S2, S). smallest_var([[]|T], S0, S) :- smallest_var(T, S0, S). smallest(A, B, S) :- ( A @< B -> S = A ; S = B ). %! var_annotations(+Var, +LoL0, -LoL, -Annotations) % % Get all Annotation for Var. Note that the annotation is either % the head of the list or not in the list. var_annotations(_, [], [], []) :- !. var_annotations(Var, [[Var=A|TA0]|TL0], LoL, [A|TA]) :- !, ( TA0 == [] -> LoL = TL ; LoL = [TA0|TL] ), var_annotations(Var, TL0, TL, TA). var_annotations(Var, [A0|TL0], [A0|TL], [any|A]) :- var_annotations(Var, TL0, TL, A). where:attr_unify_hook(_,_) :- fail. where:attr_portray_hook(Val, _Var) :- print(Val). %! attrs_to_terms(AttrsVars, List) % % Convert X{where=A} into X=A terms. Without this we cannot use % bagof/3 and maintain the variables. Not sure this is a bug in % bagof or not. attrs_to_terms([], []). attrs_to_terms([H|T0], [H=A|T]) :- get_attr(H, where, A0), del_attr(H, where), normalise_annotation(A0, A), attrs_to_terms(T0, T). %! sort_lol(+ListOfList, ListOfSortedLists) sort_lol([], []). sort_lol([H0|T0], [H|T]) :- sort(H0, H), sort_lol(T0, T). %! remove_annotations(+List, +Attr) remove_annotations([], _). remove_annotations([H|T], A) :- del_attr(H, A), remove_annotations(T, A). %! object_annotations(+In, -Out, -Goal) object_annotations(O0, O, G) :- get_attr(O0, where, Annotations), object_annot(Annotations, O0, O, G), !. object_annotations(O, O, true). object_annot((literal, like(Pattern)), O, literal(like(Pattern), L), O = literal(L)). %! resource_annotations(R, G) resource_annotations(R, Goal) :- get_attr(R, where, Annotations), resource_annot(Annotations, R, Goal), !. resource_annotations(_, true). resource_annot(eq(R1), R, true) :- % where A = B var(R1), !, del_attr(R, where), R = R1. resource_annot(eq(query(String)), R, true) :- !, del_attr(R, where), R = String. resource_annot(or(List), R, Goal) :- eq_list(List, Resources), !, Goal = member(R, Resources). eq_list([], []). eq_list([eq(query(R))|T0], [R|T]) :- eq_list(T0, T). %! clean_conj(+Goal0, -Goal) % % Remove redundant true statements from a conjunction clean_conj((true, G0), G) :- !, clean_conj(G0, G). clean_conj((G0, true), G) :- !, clean_conj(G0, G). clean_conj(G, G). /******************************* * PARSER * *******************************/ %! serql_parse(+Input, -ParseTree) % % Parse the SeRQL statement Input into a Prolog representation. serql_parse(Codes, Query) :- is_list(Codes), !, ( phrase(tokens(Tokens), Codes), phrase(query(Query0, NameSpaces), Tokens), expand_vars(Query0, Query1), expand_uris(Query1, NameSpaces, Query) -> true ; syntax_error(unknown) ). serql_parse(Atomic, Query) :- atomic(Atomic), !, atom_codes(Atomic, Codes), serql_parse(Codes, Query). serql_parse(Input, _) :- throw(error(type_error(text, Input), _)). /******************************* * ERRORS * *******************************/ syntax_error(What) :- throw(error(syntax_error(What), context(_, 'in SeRQL query'))). /******************************* * NAMESPACES * *******************************/ expand_uris(Var, _, Var) :- var(Var), !. expand_uris(uri(URI), _, URI) :- !. % expand_uris(uri(NS, URI0), Map, URI) :- % foo:bar !, ( memberchk(NS=Prefix, Map) -> true ; ns(NS, Prefix) -> true ; throw(error(existence_error(namespace, NS), _)) ), atom_concat(Prefix, URI0, URI). expand_uris(old_uri(NS, URI0), Map, URI) :- % !, ( ( memberchk(NS=Prefix, Map) ; ns(NS, Prefix) ) -> atom_concat(Prefix, URI0, URI) ; concat_atom([NS, :, URI0], URI) ). expand_uris(Q0, Map, Q) :- compound(Q0), !, functor(Q0, Name, Arity), functor(Q, Name, Arity), expand_uris(0, Arity, Q0, Map, Q). expand_uris(Q, _, Q). expand_uris(Arity, Arity, _, _, _) :- !. expand_uris(I0, Arity, Q0, Map, Q) :- I is I0 + 1, arg(I, Q0, A0), arg(I, Q, A), expand_uris(A0, Map, A), expand_uris(I, Arity, Q0, Map, Q). %! ns(?Id, ?URI) % % Translate between namespace id and URI. If the flag % rdf_db_namespaces is true, we share the namespace declarations % with the SeRQL store. ns(NS, URI) :- setting(cliopatria:rdf_db_namespaces, true), !, rdf_db:ns(NS, URI). ns(NS, URI) :- serql_ns(NS, URI). serql_ns(rdf, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'). serql_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#'). serql_ns(owl, 'http://www.w3.org/2002/7/owl#'). serql_ns(xsd, 'http://www.w3.org/2001/XMLSchema#'). % Wrong in SeRQL docs! serql_ns(serql,'http://rdf4j.org/schema/serql#'). /******************************* * VARIABLES * *******************************/ % TBD: Check that projection variables actually appear in the % query! expand_vars(select(*, Path0, Where0, Distinct, Limit, Offset), select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !, var_names(Path0-Where0, Path-Where, VarNames), vars(VarNames, Vars, Names), Row =.. [row | Vars], VNames =.. [names|Names]. expand_vars(select(Projection, Path0, Where0, Distinct, Limit, Offset), select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !, var_names(x(Projection,Path0,Where0), x(Vars,Path,Where), _VarNames), Row =.. [row | Vars], proj_names(Projection, Names), VNames =.. [names|Names]. expand_vars(construct(*, Path0, Where0, Distinct, Limit, Offset), construct(Path, Path, Where, Distinct, Limit, Offset)) :- !, var_names(x(Path0,Where0), x(Path,Where), _VarNames). expand_vars(construct(Ret0, Path0, Where0, Distinct, Limit, Offset), construct(Ret, Path, Where, Distinct, Limit, Offset)) :- !, var_names(x(Ret0,Path0,Where0), x(Ret,Path,Where), _VarNames). var_names(var(-(V)), V, _) :- !. % bnodes, mapped from {} var_names(var(Name), Var, Map) :- member(Name=Var, Map), !. var_names(Q0, Q, Map) :- compound(Q0), !, functor(Q0, Name, Arity), functor(Q, Name, Arity), var_names(0, Arity, Q0, Q, Map). var_names(Q, Q, _). var_names(Arity, Arity, _, _, _) :- !. var_names(I0, Arity, Q0, Q, Map) :- I is I0 + 1, arg(I, Q0, A0), arg(I, Q, A), var_names(A0, A, Map), var_names(I, Arity, Q0, Q, Map). vars([], [], []) :- !. % also closes list! vars([Name=Var|T0], [Var|TV], [Name|TN]) :- vars(T0, TV, TN). proj_names([], []). proj_names([var(Var)|T0], [Var|T]) :- !, proj_names(T0, T). proj_names([_|T0], [-|T]) :- proj_names(T0, T). /******************************* * ERROR LOCATIONS * *******************************/ syntax_error(What, In, []) :- throw(error(syntax_error(What), context(_, left(In)))). add_error_location(error(syntax_error(What), context(_, left(After))), Tokens) :- append(Before, After, Tokens), length(Before, BL), ( BL =< 5 -> BC = Before ; length(BC0, 5), append(_, BC0, Before), BC = ['...'|BC0] ), length(After, AL), ( AL =< 5 -> AC = After ; length(AC0, 5), append(AC0, _, After), append(AC0, ['...'], AC) ), append(BC, ['**here**'|AC], ContextTokens0), maplist(token_to_atom, ContextTokens0, ContextTokens), concat_atom(ContextTokens, ' ', Context), throw(error(syntax_error(What), context(serql_parse/2, Context))). token_to_atom(Token, Token) :- atom(Token), !. token_to_atom(id(X), X) :- !. token_to_atom(string(X), X) :- !. token_to_atom(uri(URI), X) :- !, concat_atom([<, URI, >], X). token_to_atom(uri(NS,Local), X) :- !, concat_atom([NS, Local], :, X). token_to_atom(old_uri(NS,Local), X) :- !, concat_atom([<, NS, :, Local, >], X). token_to_atom(cmp(X), X) :- !. token_to_atom(rest(X), X) :- !. token_to_atom(Token, Atom) :- term_to_atom(Token, Atom). query(Query, NameSpaces, In, Out) :- catch(compilation_unit(Query, NameSpaces, In, Out), E, add_error_location(E, In)). must_see(Token) --> [Token], !. must_see(Token) --> syntax_error(expected(Token)). must_see(Token, _) --> [Token], !. must_see(_, UserName) --> syntax_error(expected(UserName)). /******************************* * HIGH LEVEL PARSER * *******************************/ compilation_unit(Query, NameSpaces) --> query(Query), namespace_list(NameSpaces). %! namespace_list(-NSList:list)// is det. % % @param NSList List of Prefix=URI for each defined namespace namespace_list([H|T]) --> [ using ], !, must_see(namespace), !, must_see_namespace(H), namespaces(T). namespace_list([]) --> []. must_see_namespace(Decl) --> namespace(Decl), !. must_see_namespace(_) --> syntax_error(expected(namespace_declaration)). namespace(NS=URI) --> must_see(id(NS), identifier), must_see(cmp(=), =), namespace_uri(URI). namespace_uri(URI) --> [ uri(URI) ], !. namespace_uri(URI) --> [ old_uri(Protocol, Local) ], % New style !, { concat_atom([Protocol, :, Local], URI) }. namespace_uri(_) --> syntax_error(expected(absolute_uri)). namespaces([H|T]) --> [ ',' ], !, must_see_namespace(H), namespaces(T). namespaces([]) --> []. query(select(Projection, Path, Where, Distinct, Limit, Offset)) --> [ select ], !, distinct(Distinct), projection(Projection), must_see(from), path_expr_list(Path), query_tail(Where, Limit, Offset). query(construct(Construct, Path, Where, Distinct, Limit, Offset)) --> [ construct ], !, distinct(Distinct), construct_clause(Construct), must_see(from), path_expr_list(Path), query_tail(Where, Limit, Offset). query(_) --> syntax_error(no_select_or_construct). distinct(distinct) --> [ distinct ], !. distinct(false) --> []. query_tail(Where, Limit, Offset) --> ( [ where ] -> ( boolean_query(Where) -> [] ; syntax_error(illegal_where_clause) ) ; {Where = true} ), ( [ limit ] -> ( pos_int(Limit) -> [] ; syntax_error(illegal_limit) ) ; {Limit = inf} ), ( [ offset ] -> ( pos_int(Offset) -> [] ; syntax_error(illegal_offset) ) ; {Offset = 0} ). projection(*) --> [ * ], !. projection([H|T]) --> var_or_value(H), !, var_or_value_list(T). projection(_) --> syntax_error(expected(projection)). construct_clause(*) --> [ * ], !. construct_clause(Path) --> path_expr_list(Path), !. construct_clause(_) --> syntax_error(expected(construct_clause)). path_expr_list(Expr) --> must_see_path_expr(E0), ( [ ',' ] -> path_expr_list(Es), { Expr = (E0, Es) } ; { Expr = E0 } ). must_see_path_expr(E) --> path_expr(E), !. must_see_path_expr(_) --> syntax_error(expected(path_expression)). path_expr(optional(_, Path)) --> [ '[' ], !, path_expr_list(Path), must_see(']'). path_expr(Expr) --> path_expr0(Expr). path_expr0(Expr) --> path_expr_head(Head), ( ( [ ';' ] -> { arg(1, Head, H) } ; { arg(3, Head, H) } ), path_expr_tail(H, Tail) -> { Expr = (Head, Tail) } ; { Expr = Head } ). path_expr_head(rdf(S, P, O)) --> must_see_node(S), must_see_edge(P), must_see_node(O). path_expr_tail(S, Expr) --> [ '[' ], path_expr_tail0(S, Expr1), [ ']' ], { Expr0 = optional(_, Expr1) }, ( [ ';' ] -> path_expr_tail(S, Tail), { Expr = (Expr0, Tail) } ; { Expr = Expr0 } ). path_expr_tail(S, Expr) --> path_expr_tail0(S, Expr). % path_expr_tail0 <=> Edge Node ((";")? Path_expr_tail)? path_expr_tail0(S, Expr) --> edge(P), must_see_node(O), { Statement = rdf(S, P, O) }, ( ( [ ';' ] -> path_expr_tail(S, Tail) ; path_expr_tail(O, Tail) ) -> { Expr = (Statement, Tail) } ; { Expr = Statement } ). must_see_edge(Edge) --> edge(Edge), !. must_see_edge(_) --> syntax_error(expected(edge)). edge(var(Var)) --> [ id(Var) ], !. edge(uri(URI)) --> [ uri(URI) ], !. % edge(uri(NS, URI)) --> [ uri(NS, URI) ], !. % foo:bar edge(old_uri(NS, URI)) --> [ old_uri(NS, URI) ], !. % must_see_node(Node) --> node(Node), !. must_see_node(_) --> syntax_error(expected(node)). node(Node) --> [ '{' ], node_elem(E0), !, node_elem_list(Es), [ '}' ], ( {Es == []} -> {Node = E0} ; {Node = set([E0|Es])} ). node(var(-(_))) --> % the _ is the variable that will [ '{', '}' ]. % be shared node_elem_list([H|T]) --> [ ',' ], !, must_see_node_elem(H), node_elem_list(T). node_elem_list([]) --> []. must_see_node_elem(Elem) --> node_elem(Elem), !. must_see_node_elem(_) --> syntax_error(expected(node_element)). node_elem(Elem) --> ( var(Elem) ; uri(Elem) ; literal(Elem) ; reified_stat(Elem) ), !. reified_stat(rdf(S,P,O)) --> node(S), must_see_edge(P), must_see_node(O). /******************************* * WHERE ... * *******************************/ boolean_query(Query) --> and_expr(And), ( [ or ], boolean_query(Or) -> {Query = (And ; Or)} ; {Query = And} ). and_expr(Query) --> boolean_query0(Q0), ( [ and ], and_expr(And) -> {Query = (Q0, And)} ; {Query = Q0} ). boolean_query0(Query) --> [ '(' ], !, boolean_query(Query), must_see(')'). boolean_query0(true) --> [ true ], !. boolean_query0(fail) --> [ false ], !. boolean_query0(\+(Q)) --> [ not ], !, boolean_query0(Q). boolean_query0(serql_compare(Cmp, L, R)) --> var_or_query_value(L), [ cmp(Cmp) ], !, var_or_query_value(R). boolean_query0(serql_compare(like, Var, String)) --> var_or_value(Var), % must be var? [ like ], !, must_see_string(String). boolean_query0(rdf_is_literal(V)) --> [ isliteral, '(' ], !, var(V), must_see(')'). boolean_query0(rdf_is_resource(V)) --> [ isresource, '(' ], !, var(V), must_see(')'). boolean_query0(_) --> syntax_error(expected(boolean_test)). must_see_string(String) --> [ string(String) ], !. must_see_string(_) --> syntax_error(expected(string)). var_or_value_list([H|T]) --> [ ',' ], !, must_see_var_or_value(H), var_or_value_list(T). var_or_value_list([]) --> []. must_see_var_or_value(X) --> var_or_value(X), !. must_see_var_or_value(_) --> syntax_error(expected(var_or_value)). var_or_value(X) --> var(X), !. var_or_value(X) --> value(X). var_or_query_value(X) --> ( literal_value(Value) -> { X = query(Value) } ; var_or_value(X) ). var(var(Var)) --> [ id(Var) ], !. value(URI) --> uri(URI). value('$null$') --> [ null ]. value(Literal) --> literal(Literal), !. value(datatype(var(Var))) --> [ datatype, '(', id(Var), ')' ]. value(lang(var(Var))) --> [ lang, '(', id(Var), ')' ]. value(label(var(Var))) --> [ label, '(', id(Var), ')' ]. uri(uri(URI)) --> [uri(URI)]. uri(uri(NS, URI)) --> [uri(NS, URI)]. uri(old_uri(NS, URI)) --> [old_uri(NS, URI)]. literal(Literal) --> literal_value(Value), { Literal = literal(Value) }. literal_value(Lit) --> [ string(String) ], ( [@, id(Lang)] -> { Lit = lang(Lang, String) } ; [^^, URI] -> { Lit = type(URI, String) } ; { Lit = String } ). pos_int(I) --> [ int(I) ], { I >= 0 }. % bit weird not to have >0, but this % is the Sesame spec /******************************* * TOKENISER * *******************************/ tokens([H|T]) --> blank, token(H), !, tokens(T). tokens([]) --> blank. token(uri(URI)) --> % Old style absolute URI "", { atom_codes(URI, Codes) }. token(old_uri(NS, Local)) --> % Old style local, new style absolute "<", identifier(NS), ":", uri_codes(Codes), ">", { atom_codes(Local, Codes) }. token(string(String)) --> "\"", string_codes(Codes), "\"", { atom_codes(String, Codes) }. token(Token) --> identifier(Id), !, ( ":", identifier(Local) % new style URI -> { Token = uri(Id, Local) } ; { downcase_atom(Id, Keyword), serql_keyword(Keyword) -> Token = Keyword ; Token = id(Id) } ). token(int(Int)) --> digit(D0), !, digits(Digits), { number_codes(Int, [D0|Digits]) }. token(cmp(Cmp)) --> cmp(Cmp), !. token(^^) --> "^^", !. token(Char) --> [C], { single(C), char_code(Char, C) }. token(rest(Rest), In, []) :- % catch syntax errors. In \== [], atom_codes(Rest, In). single(0'*). single(0'=). single(0'(). single(0')). single(0'{). single(0'}). single(0'[). single(0']). single(0'@). single(0',). single(0';). % cmp//1 % % Returns Prolog comparison operators from the SeRQL ones. cmp(=<) --> "<=". cmp(\=) --> "!=". cmp(>=) --> ">=". cmp(=) --> "=". cmp(<) --> "<". cmp(>) --> ">". %! uri_codes(-Codes) % % Get a URI string. Does not check for otherwise valid syntax. % This could be done using library(url). uri_codes([C0|Cs]) --> [C0], { uri_code(C0) }, !, uri_codes(Cs). uri_codes([]) --> []. uri_code(C) :- code_type(C, csym), !. uri_code(0'$). uri_code(0'-). uri_code(0'@). uri_code(0'&). uri_code(0'+). uri_code(0'.). uri_code(0'/). uri_code(0'?). uri_code(0'#). uri_code(0'=). uri_code(0':). uri_code(0'~). % officially not uri_code(0';). uri_code(0'{). uri_code(0'}). %! string_codes(-Codes) % % Chars between "...", Can contain \" and \\ string_codes([C0|Cs]) --> "\"", [C0], { C0 == 0'\\ ; C0 = 0'" }, !, string_codes(Cs). string_codes([]) --> peek(0'"). string_codes([C0|Cs]) --> [C0], string_codes(Cs). %! identifier(-Id) % % An SeRQL must start with a letter or an underscore ('_') and can % be followed by zero or more letters, numbers, underscores, % dashes ('-') or dots ('.'). identifier(Id) --> [C0], { code_type(C0, csymf) }, id_chars(Cs), { atom_codes(Id, [C0|Cs]) }. id_chars([C0|Cs]) --> [C0], { code_type(C0, csym) ; C0 == 0'. ; C0 == 0'- }, !, id_chars(Cs). id_chars([]) --> []. digit(D) --> [D], { code_type(D, digit) }. digits([D0|Ds]) --> digit(D0), !, digits(Ds). digits([]) --> []. blank --> [C], { code_type(C, space) }, !, blank. blank --> []. %! serql_keyword(?Keyword) % % True if Keyword is the lowercase version if a keyword serql_keyword(select). serql_keyword(construct). serql_keyword(from). serql_keyword(where). serql_keyword(using). serql_keyword(namespace). serql_keyword(true). serql_keyword(false). serql_keyword(not). serql_keyword(and). serql_keyword(or). serql_keyword(like). serql_keyword(label). serql_keyword(lang). serql_keyword(datatype). serql_keyword(null). serql_keyword(isresource). serql_keyword(isliteral). serql_keyword(sort). serql_keyword(in). serql_keyword(union). serql_keyword(intersect). serql_keyword(minus). serql_keyword(exists). serql_keyword(forall). serql_keyword(distinct). % SPEC: not in grammar serql_keyword(limit). % SPEC: not in grammar serql_keyword(offset). % SPEC: not in grammar /******************************* * DCG BASICS * *******************************/ peek(C, L, L) :- L = [C|_]. /******************************* * HUMAN READABLE MESSAGES * *******************************/ :- multifile prolog:message/3. prolog:message(error(syntax_error(What), context(serql_parse/2, Location))) --> [ 'Syntax error in SeRQL query: ' ], explain(What), [ ' at **here** in', nl, nl], ['~w'-[Location] ]. explain(expected(X)) --> [ '"~w" expected'-[X] ].