/* Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@vu.nl WWW: http://www.swi-prolog.org Copyright (c) 2010-2018, University of Amsterdam VU University Amsterdam All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ :- module(rdfql_util, [ select_results/6, % +Distinct, +Offset, +Limit, % :SortBy, -Result, :Goal select_results/9, % +Distinct, +Group, +Having, +Agg, % +Offset, +Limit, % :SortBy, -Result, :Goal entailment_module/2 % +Entailment, -Module ]). :- use_module(library(nb_set)). :- use_module(library(semweb/rdf_db)). :- use_module(library(lists)). :- use_module(library(pairs)). :- use_module(library(apply)). :- use_module(library(xsdp_types)). :- use_module(sparql_runtime). :- meta_predicate select_results(+, +, 0, +, +, +, +, -, 0), select_results(+, +, +, +, -, 0), select_results(+, +, +, -, 0). %! select_results(+Distinct, +Offset, +Limit, +SortBy, -Result, :Goal) % % Calls select_results/8 using Group=[] and Having=true. select_results(Distinct, Offset, Limit, SortBy, Result, Goal) :- select_results(Distinct, [], true, [], Offset, Limit, SortBy, Result, Goal). %! select_results(+Distinct, +Group, +Having, +Aggregates, %! +Offset, +Limit, +SortBy, -Result, :Goal) is nondet. % % Select results for the template Result on backtracking over % Goal. % % @param Distinct % Iff 'distinct', only consider distinct solutions % % @param Group % is a list of variables on which to group the results. These % are the only variables that can be used in the HAVING filter % and final projection. % % @param Having % is a constraint (similar to =FILTER=) to filter grouped % results. % % @param Aggregates % List of aggregate(Function, Var) % % @param Offset % Skip the first Offset results. Offset is applied after % Distinct and SortBy % % @param Limit % Only return the first Limit results. Limit is applied % after Distinct, SortBy and Offset. The value 'inf' % returns all values. % % @param SortBy % Either 'unsorted' or a term order_by(Cols), where each Col % in Cols is a term ascending(Expr) or descending(Expr). % % @tbd Group, Having and Aggregate are currently ignored. :- discontiguous select_results/9. /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1. ORDERED RESULTS WITHOUT AGGREGATES - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ select_results(Distinct, [], _:true, [], Offset, Limit, order_by(Cols), Result, Goal) :- exclude(ground, Cols, SortCols), SortCols \== [], !, group_order(SortCols, GroupedCols), reverse(GroupedCols, RevGroupedCols), maplist(sort_key_goal, RevGroupedCols, GroupedKeys, KeyGenList), list_conj(KeyGenList, KeyGenGoal), sort_template(GroupedKeys, Result, Template), findall(Template, (Goal,rdfql_util:KeyGenGoal), Results0), ( Distinct == distinct -> sort(Results0, Results1) ; Results1 = Results0 ), order_by(RevGroupedCols, Results1, Results2), apply_offset(Offset, Results2, Results3), apply_limit(Limit, Results3, Results), member(Result, Results). %! group_order(+Cols, -GroupedCols) is det. % % Group ordering expressions by the same ordering direction. E.g., % [ascending(X), ascending(Y), descending(Z)] becomes % [ascending([X,Y]), descending([Z])] group_order([], []). group_order([Col|CT0], [Grouped|GT]) :- Col =.. [Dir,Expr], Grouped =.. [Dir,[Expr|Exprs]], same_order(Dir, CT0, CT, Exprs), group_order(CT, GT). same_order(Dir, [Col|CT0], CT, [E0|ET]) :- Col =.. [Dir,E0], !, same_order(Dir, CT0, CT, ET). same_order(_, CT, CT, []). list_conj([], true) :- !. list_conj([G], G) :- !. list_conj([G|T0], (G,T)) :- list_conj(T0, T). %! order_by(+Cols, +Results0, -Results) is det. % % Order the results. Cols is a list of ascending(Var) or % descending(Var). Note that the sorting is done with the least % importing (right most) order declaration first and relies on the % fact that keysort/2 is stable wrt to ordering the values. % % @tbd For DESC sorting, we need to reverse, but not the % order inside the groups. We'd need a reverse keysort/2. order_by([], Results, Results). order_by([ascending(_)|T], Results0, Results) :- !, keysort(Results0, Results1), pairs_values(Results1, Results2), order_by(T, Results2, Results). order_by([descending(_)|T], Results0, Results) :- keysort(Results0, AscSorted), group_pairs_by_key(AscSorted, Grouped), reverse(Grouped, DescSortedKeyedGroups), pairs_values(DescSortedKeyedGroups, DescSortedGroups), append(DescSortedGroups, DescSorted), order_by(T, DescSorted, Results). %! sort_key_goal(+ColGroup, -KeyGroup, -Translate) sort_key_goal(ColGroup, KeyGroup, Translate) :- arg(1, ColGroup, Exprs), sort_expr_goal(Exprs, KeyGroup, Translate). sort_expr_goal([], [], true). sort_expr_goal([V], [K], sort_key(V,K)) :- !. sort_expr_goal([V|TV], [K|TK], (sort_key(V,K),G)) :- sort_expr_goal(TV, TK, G). sort_template([], Result, Result). sort_template([H0|T], Result, H-Template) :- simplify_sort_key(H0, H), sort_template(T, Result, Template). simplify_sort_key([One], One) :- !. simplify_sort_key(List, Term) :- Term =.. [v|List]. %! sort_key(+Result, -Key) is det. % % Determine the sort-key from a result according to the SPARQL % standard: % % 1. undefined/null % 2. blank nodes % 3. IRIs % 4. RDF Literals % a. Numbers are mapped to their value space % b. Other literals are mapped to their plain literal. % Note that this works fine for some types: % - xsd:date and variants % - xsd:boolean % % @bug This is not good enough. Literals must be compared in their % value-space. This requires some study. % @bug Result is a SPARQL expression. :- public sort_key/2. % Goal created by sort_key_goal/3. sort_key(Var, Var) :- var(Var), !. sort_key(literal(L), sk(4, V)) :- !, simple_literal(L, V). sort_key(IRI, sk(N, IRI)) :- ( rdf_is_bnode(IRI) -> N = 2 ; N = 3 ). simple_literal(lang(_Lang, SL), SL) :- !. simple_literal(type(Type, SL), Number) :- xsdp_numeric_uri(Type, _), atom_number(SL, Number), !. simple_literal(type(_Type, SL), SL) :- !. simple_literal(SL, SL). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 2. UNORDERED RESULTS WITHOUT AGGREGATES - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ select_results(Distinct, [], _:true, [], Offset, Limit, _Unsorted, Result, Goal) :- !, select_results(Distinct, Offset, Limit, Result, Goal). %! select_results(+Distinct, +Offset, +Limit, -Result, :Goal) % % Unsorted version. In this case we can avoid first collecting all % results. select_results(distinct, Offset, Limit, Result, Goal) :- !, term_variables(Result, Vars), V =.. [v|Vars], empty_nb_set(Set), Counter = count(0), Goal, add_nb_set(V, Set, New), New == true, apply_limit(Counter, Offset, Limit, Last), ( Last == true -> ! ; true ). select_results(_, 0, inf, _, G) :- !, G. select_results(_, Offset, Limit, _, G) :- !, Counter = count(0), G, apply_limit(Counter, Offset, Limit, Last), ( Last == true -> ! ; true ). apply_limit(_, 0, inf, false) :- !. apply_limit(Counter, Offset, Limit, Last) :- arg(1, Counter, N0), N is N0 + 1, nb_setarg(1, Counter, N), N > Offset, ( Limit \== inf, N =:= Offset+Limit -> Last = true ; Last = false ). %! apply_offset(+Offset, +AllSolutions, -OffsetSolutions) is det. apply_offset(N, [_|T], List) :- N > 0, !, N2 is N - 1, apply_offset(N2, T, List). apply_offset(_, List, List). %! apply_limit(+Limit, +AllSolutions, -LimitSolutions) is det. apply_limit(inf, List, List) :- !. apply_limit(Limit, All, List) :- limit(Limit, All, List). limit(N, [H|T0], [H|T]) :- N > 0, !, N2 is N - 1, limit(N2, T0, T). limit(_, _, []). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 3. AGGREGATE SUPPORT To support aggregation, we have to collect results and group them into sets that have equal values for the variables that appear in Group. For each group, we must: 1. Evaluate the Aggregate functions 2. Evaluate the Having constraing and drop sets for which Having fails. Note that the projection (=Result) and Having constraints can only use variables Group and aggregate functions. This implies that we need to track the grouped variables as well as variables that are needed to compute the aggregates, but _no_ more. Q: Can we call select_results/9 recursively with the iteration over the groups to get OrderBy, Offset and Limit working? Q: When do we need to apply Distinct? Before or after grouping? Note that we do not need to do anything with the result term because the output variables are already shared with it. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ select_results(_Distinct, [], Having, AggregateEval, _Offset, _Limit, _Order, _Result, Goal) :- !, aggregate_vars(AggregateEval, Aggregates, AggVars, Eval), AV =.. [a|AggVars], findall(AV, Goal, Group), aggregate(Group, Aggregates), call(Eval), call(Having). select_results(_Distinct, Group, Having, AggregateEval, Offset, Limit, _Order, _Result, Goal) :- aggregate_vars(AggregateEval, Aggregates, AggVars, Eval), GV =.. [v|Group], AV =.. [a|AggVars], findall(GV-AV, Goal, Pairs), keysort(Pairs, SortedPairs), group_pairs_by_key(SortedPairs, Groups0), apply_offset(Offset, Groups0, Groups1), apply_limit(Limit, Groups1, Groups), member(GV-G, Groups), aggregate(G, Aggregates), call(Eval), call(Having). %! aggregate(+Group, +Aggregates) aggregate([AVT|Group], Aggregates) :- !, AVT =.. [a|AV0], maplist(aggregate_setup, AV0, State0), aggregate_steps(Group, State0, State), maplist(aggregate_bind, Aggregates, State). aggregate([], Aggregates) :- maplist(empty_aggregate, Aggregates). aggregate_setup(count(X), Count) :- aggregate_step(count(X), 0, Count). aggregate_setup(distinct(X, _Op), Set) :- ( is_null(X) -> Set = [] ; Set = [X] ). aggregate_setup(sum(X0), X) :- sparql_eval_raw(X0, X). aggregate_setup(min(X0), X) :- sparql_eval_raw(X0, X). aggregate_setup(max(X0), X) :- sparql_eval_raw(X0, X). aggregate_setup(avg(X0), X-1) :- sparql_eval_raw(X0, X). aggregate_setup(sample(X), X). aggregate_setup(group_concat(Expr,_), [X]) :- group_concat_value(Expr, X). aggregate_steps([], State, State). aggregate_steps([HT|T], State0, State) :- HT =.. [a|H], maplist(aggregate_step, H, State0, State1), aggregate_steps(T, State1, State). aggregate_step(count(X), Count0, Count) :- ( is_null(X) -> Count = Count0 ; Count is Count0 + 1 ). aggregate_step(distinct(X, _Op), S0, S) :- ( is_null(X) -> S = S0 ; S = [X|S0] ). aggregate_step(sum(X), Sum0, Sum) :- sparql_eval_raw(X+Sum0, Sum). aggregate_step(min(X), Min0, Min) :- sparql_eval_raw(min(X, Min0), Min). aggregate_step(max(X), Min0, Min) :- sparql_eval_raw(max(X, Min0), Min). aggregate_step(avg(X), Sum0-Count0, Sum-Count) :- sparql_eval_raw(X+Sum0, Sum), Count is Count0+1. aggregate_step(sample(X), S0, S) :- ( is_null(S0) -> S = X ; S = S0 ). aggregate_step(group_concat(Expr, _), S0, [X|S0]) :- group_concat_value(Expr, X). %! aggregate_bind(+Aggregation, +State) is det. % % @tbd: bind to error if the function does not evaluate? aggregate_bind(aggregate(Func, Var), State) :- aggregate_bind(Func, Var, State). aggregate_bind(count(_), Count, Count0) :- rdf_equal(xsd:integer, XSDInt), sparql_eval(numeric(XSDInt, Count0), Count). aggregate_bind(sum(_), Sum, Sum0) :- bind_number(Sum0, Sum). aggregate_bind(min(_), Min, Min0) :- bind_number(Min0, Min). aggregate_bind(max(_), Max, Max0) :- bind_number(Max0, Max). aggregate_bind(avg(_), Avg, Sum-Count) :- rdf_equal(xsd:integer, XSDInt), sparql_eval(Sum/numeric(XSDInt, Count), Avg). aggregate_bind(sample(_), Sample, Sample). aggregate_bind(group_concat(Expr, literal(Sep)), literal(Concat), Parts0) :- ( functor(Expr, distinct, 1) -> sort(Parts0, Parts) ; Parts = Parts0 ), maplist(text_of, Parts, Texts), atomic_list_concat(Texts, Sep, Concat). aggregate_bind(distinct(_, Op), Value, Set) :- sort(Set, Distinct), aggregate_distinct(Op, Distinct, Value). %! aggregate_distinct(+Operation, +Set, -Value) aggregate_distinct(count, Set, Value) :- rdf_equal(xsd:integer, IntType), length(Set, Len), bind_number(numeric(IntType, Len), Value). aggregate_distinct(sum, Set, Sum) :- rdf_equal(xsd:integer, IntType), foldl(add, Set, number(IntType, 0), Sum0), bind_number(Sum0, Sum). aggregate_distinct(avg, Set, Avg) :- aggregate_distinct(sum, Set, Sum), length(Set, Count), rdf_equal(xsd:integer, XSDInt), sparql_eval(Sum/numeric(XSDInt, Count), Avg). add(X, Sum0, Sum) :- sparql_eval_raw(X+Sum0, Sum). text_of(Expr, Atom) :- sparql_eval_raw(Expr, V0), raw_text(V0, Atom). raw_text(string(X), X). raw_text(simple_literal(X), X). raw_text(lang(_Lang, SL), SL). % allow lang qualified strings in % group_concat bind_number(V0, V) :- ( V0 = numeric(_, _) -> sparql_eval(V0, V) ; V = '$null$' ). group_concat_value(Expr, Value) :- ( functor(Expr, distinct, 1) -> arg(1, Expr, Value) ; Value = Expr ). :- rdf_meta empty_aggregate(t). empty_aggregate(aggregate(count(_), literal(type(xsd:integer, '0')))). empty_aggregate(aggregate(sum(_), literal(type(xsd:integer, '0')))). empty_aggregate(aggregate(distinct(_,count), literal(type(xsd:integer, '0')))). empty_aggregate(aggregate(group_concat(_,_), literal(''))). %! aggregate_vars(+AggregateEval, -Aggregates, -Template, -Query) is det. % % @param AggregateEval is a combination of aggregate(Expr, Var) % and queries that depend on aggregation results and must % therefore be executed after computing the aggregates.% % @param Aggregates is a list of aggregate(Expr, Var), where Expr % is a plain aggregate function over a number of variables % and Var is shared with the place where the aggregate is % used. aggregate_vars([], [], [], true). aggregate_vars([aggregate(Term0, Into)|T], [aggregate(Term, Into)|AT], [Term|Templ], Q) :- !, x_distinct(Term0, Term), aggregate_vars(T, AT, Templ, Q). aggregate_vars([Q0|T], Agg, Templ, Q) :- aggregate_vars(T, Agg, Templ, Q1), mkconj(Q1, Q0, Q). mkconj(true, Q, Q) :- !. mkconj(Q, true, Q) :- !. mkconj(A, B, (A,B)). x_distinct(Term0, Term) :- arg(1, Term0, Spec), compound(Spec), Spec = distinct(_), distinct_x(Term0, Term), !. x_distinct(Term, Term). distinct_x(count(distinct(X)), distinct(X, count)). distinct_x(count(sum(X)), distinct(X, sum)). distinct_x(count(avg(X)), distinct(X, avg)). is_null(X) :- ( var(X) -> true ; X == '$null$' ). /******************************* * ENTAILMENT * *******************************/ %! entailment_module(+Entailment, -Module) % % Find the Prolog module implementing the entailment rules for a % semantic web language. entailment_module(Entailment, Module) :- cliopatria:entailment(Entailment, Module), !. entailment_module(Entailment, _) :- throw(error(existence_error(entailment, Entailment), _)). :- multifile cliopatria:entailment/2.