View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(turtle,
   37          [ rdf_load_turtle/3,                  % +Input, -Triples, +Options
   38            rdf_read_turtle/3,                  % +Input, -Triples, +Options
   39            rdf_process_turtle/3,               % +Input, :OnObject, +Options
   40                                                % re-exports
   41            rdf_save_turtle/2,                  % +File, +Options
   42            rdf_save_canonical_turtle/2,        % +File, +Options
   43            rdf_save_trig/2,                    % +File, +Options
   44            rdf_save_canonical_trig/2,          % +File, +Options
   45            rdf_save_ntriples/2                 % +File, +Options
   46          ]).   47:- use_module(library(semweb/rdf_turtle_write)). % re-exports
   48:- use_module(library(semweb/rdf_db),
   49              [rdf_transaction/2,rdf_set_graph/2,rdf_assert/4]).   50
   51:- autoload(library(memfile),
   52	    [atom_to_memory_file/2,open_memory_file/4]).   53:- autoload(library(option),[option/3,option/2]).   54:- autoload(library(uri),
   55	    [uri_file_name/2,uri_is_global/1,uri_normalized/2]).   56:- autoload(library(http/http_open),[http_open/3]).   57
   58% re-exports
   59:- meta_predicate
   60    rdf_process_turtle(+,2,+).   61
   62:- predicate_options(rdf_load_turtle/3, 3,
   63                     [pass_to(rdf_read_turtle/3, 3)]).   64:- predicate_options(rdf_process_turtle/3, 3,
   65                     [ anon_prefix(atom),
   66                       base_uri(atom),
   67                       base_used(-atom),
   68                       db(atom),
   69                       error_count(-integer),
   70                       namespaces(-list),
   71                       on_error(oneof([warning,error])),
   72                       prefixes(-list),
   73                       resources(oneof([uri,iri]))
   74                     ]).   75:- predicate_options(rdf_read_turtle/3, 3,
   76                     [ anon_prefix(atom),
   77                       base_uri(atom),
   78                       base_used(-atom),
   79                       db(atom),
   80                       error_count(-integer),
   81                       namespaces(-list),
   82                       on_error(oneof([warning,error])),
   83                       prefixes(-list),
   84                       resources(oneof([uri,iri]))
   85                     ]).   86
   87:- use_foreign_library(foreign(turtle)).   88:- public                               % used by the writer
   89    turtle_pn_local/1,
   90    turtle_write_quoted_string/2,
   91    turtle_write_uri/2.   92
   93/** <module> Turtle: Terse RDF Triple Language
   94
   95This module implements the Turtle  language   for  representing  the RDF
   96triple model as defined by Dave Beckett  from the Institute for Learning
   97and Research Technology University of Bristol  and later standardized by
   98the W3C RDF working group.
   99
  100This module acts as a plugin to   rdf_load/2,  for processing files with
  101one of the extensions =|.ttl|= or =|.n3|=.
  102
  103@see    http://www.w3.org/TR/turtle/ (used W3C Recommendation 25
  104        February 2014)
  105*/
  106
  107%!  rdf_read_turtle(+Input, -Triples, +Options)
  108%
  109%   Read a stream or file into a set of triples or quadruples (if
  110%   faced with TriG input) of the format
  111%
  112%           rdf(Subject, Predicate, Object [, Graph])
  113%
  114%   The representation is consistent with the SWI-Prolog RDF/XML
  115%   and ntriples parsers.  Provided options are:
  116%
  117%           * base_uri(+BaseURI)
  118%           Initial base URI.  Defaults to file://<file> for loading
  119%           files.
  120%
  121%           * anon_prefix(+Prefix)
  122%           Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
  123%           If Prefix is not an atom blank nodes are generated as
  124%           node(1), node(2), ...
  125%
  126%           * format(+Format)
  127%           One of =auto= (default), =turtle= or =trig=.  The
  128%           auto mode switches to TriG format of there is a
  129%           =|{|= before the first triple.  Finally, of the
  130%           format is explicitly stated as =turtle= and the
  131%           file appears to be a TriG file, a warning is printed
  132%           and the data is loaded while ignoring the graphs.
  133%
  134%           * resources(URIorIRI)
  135%           Officially, Turtle resources are IRIs.  Quite a
  136%           few applications however send URIs.  By default we
  137%           do URI->IRI mapping because this rarely causes errors.
  138%           To force strictly conforming mode, pass =iri=.
  139%
  140%           * prefixes(-Pairs)
  141%           Return encountered prefix declarations as a
  142%           list of Alias-URI
  143%
  144%           * namespaces(-Pairs)
  145%           Same as prefixes(Pairs).  Compatibility to rdf_load/2.
  146%
  147%           * base_used(-Base)
  148%           Base URI used for processing the data.  Unified to
  149%           [] if there is no base-uri.
  150%
  151%           * on_error(+ErrorMode)
  152%           In =warning= (default), print the error and continue
  153%           parsing the remainder of the file.  If =error=, abort
  154%           with an exception on the first error encountered.
  155%
  156%           * error_count(-Count)
  157%           If on_error(warning) is active, this option cane be
  158%           used to retrieve the number of generated errors.
  159%
  160%   @param  Input is one of stream(Stream), atom(Atom), a =http=,
  161%           =https= or =file= url or a filename specification as
  162%           accepted by absolute_file_name/3.
  163
  164rdf_read_turtle(In, Triples, Options) :-
  165    base_uri(In, BaseURI, Options),
  166    setup_call_cleanup(
  167        ( open_input(In, Stream, Close),
  168          create_turtle_parser(Parser, Stream,
  169                               [ base_uri(BaseURI)
  170                               | Options
  171                               ])
  172        ),
  173        ( turtle_parse(Parser, Triples,
  174                       [ parse(document)
  175                       | Options
  176                       ]),
  177          post_options(Parser, Options)
  178        ),
  179        ( destroy_turtle_parser(Parser),
  180          call(Close)
  181        )).
  182
  183%!  rdf_load_turtle(+Input, -Triples, +Options)
  184%
  185%   @deprecated Use rdf_read_turtle/3
  186
  187rdf_load_turtle(Input, Triples, Options) :-
  188    rdf_read_turtle(Input, Triples, Options).
  189
  190
  191%!  rdf_process_turtle(+Input, :OnObject, +Options) is det.
  192%
  193%   Streaming  Turtle  parser.  The  predicate  rdf_process_turtle/3
  194%   processes Turtle data from Input, calling   OnObject with a list
  195%   of triples for every Turtle _statement_ found in Input. OnObject
  196%   is  called  as  below,  where  `ListOfTriples`   is  a  list  of
  197%   rdf(S,P,O) terms for a normal Turtle  file or rdf(S,P,O,G) terms
  198%   if the =GRAPH= keyword is used to  associate a set of triples in
  199%   the document with  a  particular   graph.  The  `Graph` argument
  200%   provides the default graph for storing the triples and _Line_ is
  201%   the line number where the statement started.
  202%
  203%     ==
  204%     call(OnObject, ListOfTriples, Graph:Line)
  205%     ==
  206%
  207%   This predicate supports the same Options as rdf_load_turtle/3.
  208%
  209%   Errors encountered are sent to  print_message/2, after which the
  210%   parser tries to recover and parse the remainder of the data.
  211%
  212%   @see  This  predicate  is  normally    used  by  load_rdf/2  for
  213%   processing RDF data.
  214
  215rdf_process_turtle(In, OnObject, Options) :-
  216    base_uri(In, BaseURI, Options),
  217    option(graph(Graph), Options, BaseURI),
  218    setup_call_cleanup(
  219        ( open_input(In, Stream, Close),
  220          create_turtle_parser(Parser, Stream, Options)
  221        ),
  222        ( process_turtle(Parser, Stream, OnObject, Graph,
  223                         [ parse(statement)
  224                         ]),
  225          post_options(Parser, Options)
  226        ),
  227        ( destroy_turtle_parser(Parser),
  228          call(Close)
  229        )).
  230
  231post_options(Parser, Options) :-
  232    prefix_option(Parser, Options),
  233    namespace_option(Parser, Options),
  234    base_option(Parser, Options),
  235    error_option(Parser, Options).
  236
  237prefix_option(Parser, Options) :-
  238    (   option(prefixes(Pairs), Options)
  239    ->  turtle_prefixes(Parser, Pairs)
  240    ;   true
  241    ).
  242namespace_option(Parser, Options) :-
  243    (   option(namespaces(Pairs), Options)
  244    ->  turtle_prefixes(Parser, Pairs)
  245    ;   true
  246    ).
  247base_option(Parser, Options) :-
  248    (   option(base_used(Base), Options)
  249    ->  turtle_base(Parser, Base)
  250    ;   true
  251    ).
  252error_option(Parser, Options) :-
  253    (   option(error_count(Count), Options)
  254    ->  turtle_error_count(Parser, Count)
  255    ;   true
  256    ).
  257
  258
  259process_turtle(_Parser, Stream, _OnObject, _Graph, _Options) :-
  260    at_end_of_stream(Stream),
  261    !.
  262process_turtle(Parser, Stream, OnObject, Graph, Options) :-
  263    stream_pair(Stream, In, _),
  264    line_count(In, LineNo),
  265    turtle_parse(Parser, Triples,
  266                 [ parse(statement)
  267                 | Options
  268                 ]),
  269    call(OnObject, Triples, Graph:LineNo),
  270    process_turtle(Parser, Stream, OnObject, Graph, Options).
  271
  272
  273%!  open_input(+Input, -Stream, -Close) is det.
  274%
  275%   Open given input.
  276%
  277%   @param  Close goal to undo the open action
  278%   @tbd    Synchronize with input handling of rdf_db.pl.
  279%   @error  existence_error, permission_error
  280
  281open_input(stream(Stream), Stream, Close) :-
  282    !,
  283    stream_property(Stream, encoding(Old)),
  284    (   (   unicode_encoding(Old)
  285        ;   stream_property(Stream, type(text))
  286        )
  287    ->  Close = true
  288    ;   set_stream(Stream, encoding(utf8)),
  289        Close = set_stream(Stream, encoding(Old))
  290    ).
  291open_input(Stream, Stream, Close) :-
  292    is_stream(Stream),
  293    !,
  294    open_input(stream(Stream), Stream, Close).
  295open_input(atom(Atom), Stream, close(Stream)) :-
  296    !,
  297    atom_to_memory_file(Atom, MF),
  298    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  299open_input(URL, Stream, close(Stream)) :-
  300    (   sub_atom(URL, 0, _, _, 'http://')
  301    ;   sub_atom(URL, 0, _, _, 'https://')
  302    ),
  303    !,
  304    http_open(URL, Stream, []),
  305    set_stream(Stream, encoding(utf8)).
  306open_input(URL, Stream, close(Stream)) :-
  307    uri_file_name(URL, Path),
  308    !,
  309    open(Path, read, Stream, [encoding(utf8)]).
  310open_input(File, Stream, close(Stream)) :-
  311    absolute_file_name(File, Path,
  312                       [ access(read),
  313                         extensions([ttl, ''])
  314                       ]),
  315    open(Path, read, Stream, [encoding(utf8)]).
  316
  317unicode_encoding(utf8).
  318unicode_encoding(wchar_t).
  319unicode_encoding(unicode_be).
  320unicode_encoding(unicode_le).
  321
  322%!  base_uri(+Input, -BaseURI, +Options)
  323%
  324%   Determine the base uri to use for processing.
  325
  326base_uri(_Input, BaseURI, Options) :-
  327    option(base_uri(BaseURI), Options),
  328    !.
  329base_uri(_Input, BaseURI, Options) :-
  330    option(graph(BaseURI), Options),
  331    !.
  332base_uri(stream(Input), BaseURI, _Options) :-
  333    stream_property(Input, file_name(Name)),
  334    !,
  335    name_uri(Name, BaseURI).
  336base_uri(Stream, BaseURI, Options) :-
  337    is_stream(Stream),
  338    !,
  339    base_uri(stream(Stream), BaseURI, Options).
  340base_uri(Name, BaseURI, _Options) :-
  341    atom(Name),
  342    !,
  343    name_uri(Name, BaseURI).
  344base_uri(_, 'http://www.example.com/', _).
  345
  346name_uri(Name, BaseURI) :-
  347    uri_is_global(Name),
  348    !,
  349    uri_normalized(Name, BaseURI).
  350name_uri(Name, BaseURI) :-
  351    uri_file_name(BaseURI, Name).
  352
  353
  354                 /*******************************
  355                 *          WRITE SUPPORT       *
  356                 *******************************/
  357
  358%!  turtle_pn_local(+Atom:atom) is semidet.
  359%
  360%   True if Atom is a  valid   Turtle  _PN_LOCAL_ name. The PN_LOCAL
  361%   name is what can follow the : in  a resource. In the new Turtle,
  362%   this can be anything and this   function becomes meaningless. In
  363%   the old turtle, PN_LOCAL is defined   similar (but not equal) to
  364%   an XML name. This predicate  is   used  by  rdf_save_turtle/2 to
  365%   write files such that can be read by old parsers.
  366%
  367%   @see xml_name/2.
  368
  369%!  turtle_write_quoted_string(+Out, +Value, ?WriteLong) is det.
  370%
  371%   Write Value (an atom)  as  a   valid  Turtle  string.  WriteLong
  372%   determines wether the string is written   as a _short_ or _long_
  373%   string.  It takes the following values:
  374%
  375%     * true
  376%     Use Turtle's long string syntax. Embeded newlines and
  377%     single or double quotes are are emitted verbatim.
  378%     * false
  379%     Use Turtle's short string syntax.
  380%     * Var
  381%     If WriteLong is unbound, this predicate uses long syntax
  382%     if newlines appear in the string and short otherwise.  WriteLong
  383%     is unified with the decision taken.
  384
  385%!  turtle_write_quoted_string(+Out, +Value) is det.
  386%
  387%   Same as turtle_write_quoted_string(Out, Value, false), writing a
  388%   string with only a single =|"|=.   Embedded newlines are escapes
  389%   as =|\n|=.
  390
  391turtle_write_quoted_string(Out, Text) :-
  392    turtle_write_quoted_string(Out, Text, false).
  393
  394%!  turtle_write_uri(+Out, +Value) is det.
  395%
  396%   Write a URI as =|<...>|=
  397
  398
  399                 /*******************************
  400                 *          RDF-DB HOOK         *
  401                 *******************************/
  402
  403:- multifile
  404    rdf_db:rdf_load_stream/3,
  405    rdf_db:rdf_file_type/2.  406
  407%!  rdf_db:rdf_load_stream(+Format, +Stream, :Options)
  408%
  409%   (Turtle clauses)
  410
  411rdf_db:rdf_load_stream(turtle, Stream, Options) :-
  412    load_turtle_stream(Stream, Options).
  413rdf_db:rdf_load_stream(trig, Stream, Options) :-
  414    load_turtle_stream(Stream, Options).
  415
  416load_turtle_stream(Stream, _Module:Options) :-
  417    rdf_db:graph(Options, Graph),
  418    atom_concat('_:', Graph, BNodePrefix),
  419    rdf_transaction((  rdf_process_turtle(Stream, assert_triples,
  420                                          [ anon_prefix(BNodePrefix)
  421                                          | Options
  422                                          ]),
  423                       rdf_set_graph(Graph, modified(false))
  424                    ),
  425                    parse(Graph)).
  426
  427assert_triples([], _).
  428assert_triples([H|T], Location) :-
  429    assert_triple(H, Location),
  430    assert_triples(T, Location).
  431
  432assert_triple(rdf(S,P,O), Location) :-
  433    rdf_assert(S,P,O,Location).
  434assert_triple(rdf(S,P,O,G), _) :-
  435    rdf_assert(S,P,O,G).
  436
  437
  438rdf_db:rdf_file_type(ttl,  turtle).
  439rdf_db:rdf_file_type(n3,   turtle).     % not really, but good enough
  440rdf_db:rdf_file_type(trig, trig).
  441
  442
  443                 /*******************************
  444                 *             MESSAGES         *
  445                 *******************************/
  446
  447:- multifile prolog:error_message//1.  448
  449prolog:error_message(existence_error(turtle_prefix, '')) -->
  450    [ 'Turtle empty prefix (:) is not defined' ]