View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2000-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(sgml,
   38          [ load_html/3,                % +Input, -DOM, +Options
   39            load_xml/3,                 % +Input, -DOM, +Options
   40            load_sgml/3,                % +Input, -DOM, +Options
   41
   42            load_sgml_file/2,           % +File, -ListOfContent
   43            load_xml_file/2,            % +File, -ListOfContent
   44            load_html_file/2,           % +File, -Document
   45
   46            load_structure/3,           % +File, -Term, +Options
   47
   48            load_dtd/2,                 % +DTD, +File
   49            load_dtd/3,                 % +DTD, +File, +Options
   50            dtd/2,                      % +Type, -DTD
   51            dtd_property/2,             % +DTD, ?Property
   52
   53            new_dtd/2,                  % +Doctype, -DTD
   54            free_dtd/1,                 % +DTD
   55            open_dtd/3,                 % +DTD, +Options, -Stream
   56
   57            new_sgml_parser/2,          % -Parser, +Options
   58            free_sgml_parser/1,         % +Parser
   59            set_sgml_parser/2,          % +Parser, +Options
   60            get_sgml_parser/2,          % +Parser, +Options
   61            sgml_parse/2,               % +Parser, +Options
   62
   63            sgml_register_catalog_file/2, % +File, +StartOrEnd
   64
   65            xml_quote_attribute/3,      % +In, -Quoted, +Encoding
   66            xml_quote_cdata/3,          % +In, -Quoted, +Encoding
   67            xml_quote_attribute/2,      % +In, -Quoted
   68            xml_quote_cdata/2,          % +In, -Quoted
   69            xml_name/1,                 % +In
   70            xml_name/2,                 % +In, +Encoding
   71
   72            xsd_number_string/2,        % ?Number, ?String
   73            xsd_time_string/3,          % ?Term, ?Type, ?String
   74
   75            xml_basechar/1,             % +Code
   76            xml_ideographic/1,          % +Code
   77            xml_combining_char/1,       % +Code
   78            xml_digit/1,                % +Code
   79            xml_extender/1,             % +Code
   80
   81            iri_xml_namespace/2,        % +IRI, -Namespace
   82            iri_xml_namespace/3,        % +IRI, -Namespace, -LocalName
   83            xml_is_dom/1                % +Term
   84          ]).   85:- autoload(library(error),[instantiation_error/1]).   86:- autoload(library(iostream),[open_any/5,close_any/1]).   87:- autoload(library(lists),[member/2,selectchk/3]).   88:- autoload(library(option),[select_option/3,merge_options/3]).   89
   90:- meta_predicate
   91    load_structure(+, -, :),
   92    load_html(+, -, :),
   93    load_xml(+, -, :),
   94    load_sgml(+, -, :).   95
   96:- predicate_options(load_structure/3, 3,
   97                     [ charpos(integer),
   98                       cdata(oneof([atom,string])),
   99                       defaults(boolean),
  100                       dialect(oneof([html,html4,html5,sgml,xhtml,xhtml5,xml,xmlns])),
  101                       doctype(atom),
  102                       dtd(any),
  103                       encoding(oneof(['iso-8859-1', 'utf-8', 'us-ascii'])),
  104                       entity(atom,atom),
  105                       keep_prefix(boolean),
  106                       file(atom),
  107                       line(integer),
  108                       offset(integer),
  109                       number(oneof([token,integer])),
  110                       qualify_attributes(boolean),
  111                       shorttag(boolean),
  112                       case_sensitive_attributes(boolean),
  113                       case_preserving_attributes(boolean),
  114                       system_entities(boolean),
  115                       max_memory(integer),
  116                       space(oneof([sgml,preserve,default,remove,strict])),
  117                       xmlns(atom),
  118                       xmlns(atom,atom),
  119                       pass_to(sgml_parse/2, 2)
  120                     ]).  121:- predicate_options(load_html/3, 3,
  122                     [ pass_to(load_structure/3, 3)
  123                     ]).  124:- predicate_options(load_xml/3, 3,
  125                     [ pass_to(load_structure/3, 3)
  126                     ]).  127:- predicate_options(load_sgml/3, 3,
  128                     [ pass_to(load_structure/3, 3)
  129                     ]).  130:- predicate_options(load_dtd/3, 3,
  131                     [ dialect(oneof([sgml,xml,xmlns])),
  132                       pass_to(open/4, 4)
  133                     ]).  134:- predicate_options(sgml_parse/2, 2,
  135                     [ call(oneof([begin,end,cdata,pi,decl,error,xmlns,urlns]),
  136                            callable),
  137                       cdata(oneof([atom,string])),
  138                       content_length(integer),
  139                       document(-any),
  140                       max_errors(integer),
  141                       parse(oneof([file,element,content,declaration,input])),
  142                       source(any),
  143                       syntax_errors(oneof([quiet,print,style])),
  144                       xml_no_ns(oneof([error,quiet]))
  145                     ]).  146:- predicate_options(new_sgml_parser/2, 2,
  147                     [ dtd(any)
  148                     ]).  149
  150
  151/** <module> SGML, XML and HTML parser
  152
  153This library allows you to parse SGML, XML   and HTML data into a Prolog
  154data structure. The library defines several families of predicates:
  155
  156  $ High-level predicates :
  157  Most users will only use load_html/3, load_xml/3 or load_sgml/3 to
  158  parse arbitrary input into a _DOM_ structure.  These predicates all
  159  call load_structure/3, which provides more options and may be
  160  used for processing non-standard documents.
  161
  162  The DOM structure can be used by library(xpath) to extract information
  163  from the document.
  164
  165  $ The low-level parser :
  166  The actual parser is written in C and consists of two parts: one for
  167  processing DTD (Document Type Definitions) and one for parsing data.
  168  The data can either be parsed to a Prolog (_DOM_) term or the parser
  169  can perform callbacks for the DOM _events_.
  170
  171  $ Utility predicates :
  172  Finally, this library provides prmitives for classifying characters
  173  and strings according to the XML specification such as xml_name/1 to
  174  verify whether an atom is a valid XML name (identifier).  It also
  175  provides primitives to quote attributes and CDATA elements.
  176*/
  177
  178:- multifile user:file_search_path/2.  179:- dynamic   user:file_search_path/2.  180
  181user:file_search_path(dtd, '.').
  182user:file_search_path(dtd, swi('library/DTD')).
  183
  184sgml_register_catalog_file(File, Location) :-
  185    prolog_to_os_filename(File, OsFile),
  186    '_sgml_register_catalog_file'(OsFile, Location).
  187
  188:- use_foreign_library(foreign(sgml2pl)).  189
  190register_catalog(Base) :-
  191    absolute_file_name(dtd(Base),
  192                           [ extensions([soc]),
  193                             access(read),
  194                             file_errors(fail)
  195                           ],
  196                           SocFile),
  197    sgml_register_catalog_file(SocFile, end).
  198
  199:- initialization
  200    ignore(register_catalog('HTML4')).  201
  202
  203                 /*******************************
  204                 *         DTD HANDLING         *
  205                 *******************************/
  206
  207/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  208Note that concurrent access to DTD objects  is not allowed, and hence we
  209will allocate and destroy them in each   thread.  Possibibly it would be
  210nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
  211diagnosed to mess with the entity resolution by Fabien Todescato.
  212- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  213
  214:- thread_local
  215    current_dtd/2.  216:- volatile
  217    current_dtd/2.  218:- thread_local
  219    registered_cleanup/0.  220:- volatile
  221    registered_cleanup/0.  222
  223:- multifile
  224    dtd_alias/2.  225
  226:- create_prolog_flag(html_dialect, html5, [type(atom)]).  227
  228dtd_alias(html4, 'HTML4').
  229dtd_alias(html5, 'HTML5').
  230dtd_alias(html,  DTD) :-
  231    current_prolog_flag(html_dialect, Dialect),
  232    dtd_alias(Dialect, DTD).
  233
  234%!  dtd(+Type, -DTD) is det.
  235%
  236%   DTD is a DTD object created from  the file dtd(Type). Loaded DTD
  237%   objects are cached. Note that  DTD   objects  may  not be shared
  238%   between threads. Therefore, dtd/2  maintains   the  pool  of DTD
  239%   objects  using  a  thread_local  predicate.    DTD  objects  are
  240%   destroyed if a thread terminates.
  241%
  242%   @error existence_error(source_sink, dtd(Type))
  243
  244dtd(Type, DTD) :-
  245    current_dtd(Type, DTD),
  246    !.
  247dtd(Type, DTD) :-
  248    new_dtd(Type, DTD),
  249    (   dtd_alias(Type, Base)
  250    ->  true
  251    ;   Base = Type
  252    ),
  253    absolute_file_name(dtd(Base),
  254                       [ extensions([dtd]),
  255                         access(read)
  256                       ], DtdFile),
  257    load_dtd(DTD, DtdFile),
  258    register_cleanup,
  259    asserta(current_dtd(Type, DTD)).
  260
  261%!  load_dtd(+DTD, +DtdFile, +Options)
  262%
  263%   Load DtdFile into a DTD.  Defined options are:
  264%
  265%           * dialect(+Dialect)
  266%           Dialect to use (xml, xmlns, sgml)
  267%
  268%           * encoding(+Encoding)
  269%           Encoding of DTD file
  270%
  271%   @param  DTD is a fresh DTD object, normally created using
  272%           new_dtd/1.
  273
  274load_dtd(DTD, DtdFile) :-
  275    load_dtd(DTD, DtdFile, []).
  276load_dtd(DTD, DtdFile, Options) :-
  277    sgml_open_options(sgml:Options, OpenOptions, sgml:DTDOptions),
  278    setup_call_cleanup(
  279        open_dtd(DTD, DTDOptions, DtdOut),
  280        setup_call_cleanup(
  281            open(DtdFile, read, DtdIn, OpenOptions),
  282            copy_stream_data(DtdIn, DtdOut),
  283            close(DtdIn)),
  284        close(DtdOut)).
  285
  286split_dtd_options([], [], []).
  287split_dtd_options([H|T], [H|TD], S) :-
  288    dtd_option(H),
  289    !,
  290    split_dtd_options(T, TD, S).
  291split_dtd_options([H|T], TD, [H|S]) :-
  292    split_dtd_options(T, TD, S).
  293
  294dtd_option(dialect(_)).
  295
  296
  297%!  destroy_dtds
  298%
  299%   Destroy  DTDs  cached  by  this  thread   as  they  will  become
  300%   unreachable anyway.
  301
  302destroy_dtds :-
  303    (   current_dtd(_Type, DTD),
  304        free_dtd(DTD),
  305        fail
  306    ;   true
  307    ).
  308
  309%!  register_cleanup
  310%
  311%   Register cleanup of DTDs created for this thread.
  312
  313register_cleanup :-
  314    registered_cleanup,
  315    !.
  316register_cleanup :-
  317    (   current_prolog_flag(threads, true)
  318    ->  prolog_listen(this_thread_exit, destroy_dtds)
  319    ;   true
  320    ),
  321    assert(registered_cleanup).
  322
  323
  324                 /*******************************
  325                 *          EXAMINE DTD         *
  326                 *******************************/
  327
  328prop(doctype(_), _).
  329prop(elements(_), _).
  330prop(entities(_), _).
  331prop(notations(_), _).
  332prop(entity(E, _), DTD) :-
  333    (   nonvar(E)
  334    ->  true
  335    ;   '$dtd_property'(DTD, entities(EL)),
  336        member(E, EL)
  337    ).
  338prop(element(E, _, _), DTD) :-
  339    (   nonvar(E)
  340    ->  true
  341    ;   '$dtd_property'(DTD, elements(EL)),
  342        member(E, EL)
  343    ).
  344prop(attributes(E, _), DTD) :-
  345    (   nonvar(E)
  346    ->  true
  347    ;   '$dtd_property'(DTD, elements(EL)),
  348        member(E, EL)
  349    ).
  350prop(attribute(E, A, _, _), DTD) :-
  351    (   nonvar(E)
  352    ->  true
  353    ;   '$dtd_property'(DTD, elements(EL)),
  354        member(E, EL)
  355    ),
  356    (   nonvar(A)
  357    ->  true
  358    ;   '$dtd_property'(DTD, attributes(E, AL)),
  359        member(A, AL)
  360    ).
  361prop(notation(N, _), DTD) :-
  362    (   nonvar(N)
  363    ->  true
  364    ;   '$dtd_property'(DTD, notations(NL)),
  365        member(N, NL)
  366    ).
  367
  368dtd_property(DTD, Prop) :-
  369    prop(Prop, DTD),
  370    '$dtd_property'(DTD, Prop).
  371
  372
  373                 /*******************************
  374                 *             SGML             *
  375                 *******************************/
  376
  377%!  load_structure(+Source, -ListOfContent, :Options) is det.
  378%
  379%   Parse   Source   and   return   the   resulting   structure   in
  380%   ListOfContent. Source is handed to  open_any/5, which allows for
  381%   processing an extensible set of input sources.
  382%
  383%   A proper XML document contains only   a  single toplevel element
  384%   whose name matches the document type.   Nevertheless,  a list is
  385%   returned for consistency with  the   representation  of  element
  386%   content.
  387%
  388%   The  encoding(+Encoding)  option   is    treated   special   for
  389%   compatibility reasons:
  390%
  391%     - If `Encoding` is one of =iso-8859-1=, =us-ascii= or =utf-8=,
  392%       the stream is opened in binary mode and the option is passed
  393%       to the SGML parser.
  394%     - If `Encoding` is present, but not one of the above, the
  395%       stream is opened in text mode using the given encoding.
  396%     - Otherwise (no `Encoding`), the stream is opened in binary
  397%       mode and doing the correct decoding is left to the parser.
  398
  399load_structure(Spec, DOM, Options) :-
  400    sgml_open_options(Options, OpenOptions, SGMLOptions),
  401    setup_call_cleanup(
  402        open_any(Spec, read, In, Close, OpenOptions),
  403        load_structure_from_stream(In, DOM, SGMLOptions),
  404        close_any(Close)).
  405
  406sgml_open_options(Options, OpenOptions, SGMLOptions) :-
  407    Options = M:Plain,
  408    (   select_option(encoding(Encoding), Plain, NoEnc)
  409    ->  (   sgml_encoding(Encoding)
  410        ->  merge_options(NoEnc, [type(binary)], OpenOptions),
  411            SGMLOptions = Options
  412        ;   OpenOptions = Plain,
  413            SGMLOptions = M:NoEnc
  414        )
  415    ;   merge_options(Plain, [type(binary)], OpenOptions),
  416        SGMLOptions = Options
  417    ).
  418
  419sgml_encoding(Enc) :-
  420    downcase_atom(Enc, Enc1),
  421    sgml_encoding_l(Enc1).
  422
  423sgml_encoding_l('iso-8859-1').
  424sgml_encoding_l('us-ascii').
  425sgml_encoding_l('utf-8').
  426sgml_encoding_l('utf8').
  427sgml_encoding_l('iso_latin_1').
  428sgml_encoding_l('ascii').
  429
  430load_structure_from_stream(In, Term, M:Options) :-
  431    (   select_option(dtd(DTD), Options, Options1)
  432    ->  ExplicitDTD = true
  433    ;   ExplicitDTD = false,
  434        Options1 = Options
  435    ),
  436    move_front(Options1, dialect(_), Options2), % dialect sets defaults
  437    setup_call_cleanup(
  438        new_sgml_parser(Parser,
  439                        [ dtd(DTD)
  440                        ]),
  441        parse(Parser, M:Options2, TermRead, In),
  442        free_sgml_parser(Parser)),
  443    (   ExplicitDTD == true
  444    ->  (   DTD = dtd(_, DocType),
  445            dtd_property(DTD, doctype(DocType))
  446        ->  true
  447        ;   true
  448        )
  449    ;   free_dtd(DTD)
  450    ),
  451    Term = TermRead.
  452
  453move_front(Options0, Opt, Options) :-
  454    selectchk(Opt, Options0, Options1),
  455    !,
  456    Options = [Opt|Options1].
  457move_front(Options, _, Options).
  458
  459
  460parse(Parser, M:Options, Document, In) :-
  461    set_parser_options(Options, Parser, In, Options1),
  462    parser_meta_options(Options1, M, Options2),
  463    set_input_location(Parser, In),
  464    sgml_parse(Parser,
  465               [ document(Document),
  466                 source(In)
  467               | Options2
  468               ]).
  469
  470set_parser_options([], _, _, []).
  471set_parser_options([H|T], Parser, In, Rest) :-
  472    (   set_parser_option(H, Parser, In)
  473    ->  set_parser_options(T, Parser, In, Rest)
  474    ;   Rest = [H|R2],
  475        set_parser_options(T, Parser, In, R2)
  476    ).
  477
  478set_parser_option(Var, _Parser, _In) :-
  479    var(Var),
  480    !,
  481    instantiation_error(Var).
  482set_parser_option(Option, Parser, _) :-
  483    def_entity(Option, Parser),
  484    !.
  485set_parser_option(offset(Offset), _Parser, In) :-
  486    !,
  487    seek(In, Offset, bof, _).
  488set_parser_option(Option, Parser, _In) :-
  489    parser_option(Option),
  490    !,
  491    set_sgml_parser(Parser, Option).
  492set_parser_option(Name=Value, Parser, In) :-
  493    Option =.. [Name,Value],
  494    set_parser_option(Option, Parser, In).
  495
  496
  497parser_option(dialect(_)).
  498parser_option(shorttag(_)).
  499parser_option(case_sensitive_attributes(_)).
  500parser_option(case_preserving_attributes(_)).
  501parser_option(system_entities(_)).
  502parser_option(max_memory(_)).
  503parser_option(file(_)).
  504parser_option(line(_)).
  505parser_option(space(_)).
  506parser_option(number(_)).
  507parser_option(defaults(_)).
  508parser_option(doctype(_)).
  509parser_option(qualify_attributes(_)).
  510parser_option(encoding(_)).
  511parser_option(keep_prefix(_)).
  512
  513
  514def_entity(entity(Name, Value), Parser) :-
  515    get_sgml_parser(Parser, dtd(DTD)),
  516    xml_quote_attribute(Value, QValue),
  517    setup_call_cleanup(open_dtd(DTD, [], Stream),
  518                       format(Stream, '<!ENTITY ~w "~w">~n',
  519                              [Name, QValue]),
  520                       close(Stream)).
  521def_entity(xmlns(URI), Parser) :-
  522    set_sgml_parser(Parser, xmlns(URI)).
  523def_entity(xmlns(NS, URI), Parser) :-
  524    set_sgml_parser(Parser, xmlns(NS, URI)).
  525
  526%!  parser_meta_options(+Options0, +Module, -Options)
  527%
  528%   Qualify meta-calling options to the parser.
  529
  530parser_meta_options([], _, []).
  531parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :-
  532    !,
  533    parser_meta_options(T0, M, T).
  534parser_meta_options([H|T0], M, [H|T]) :-
  535    parser_meta_options(T0, M, T).
  536
  537
  538%!  set_input_location(+Parser, +In:stream) is det.
  539%
  540%   Set the input location if this was not set explicitly
  541
  542set_input_location(Parser, _In) :-
  543    get_sgml_parser(Parser, file(_)),
  544    !.
  545set_input_location(Parser, In) :-
  546    stream_property(In, file_name(File)),
  547    !,
  548    set_sgml_parser(Parser, file(File)),
  549    stream_property(In, position(Pos)),
  550    set_sgml_parser(Parser, position(Pos)).
  551set_input_location(_, _).
  552
  553                 /*******************************
  554                 *           UTILITIES          *
  555                 *******************************/
  556
  557%!  load_sgml_file(+File, -DOM) is det.
  558%
  559%   Load SGML from File and unify   the resulting DOM structure with
  560%   DOM.
  561%
  562%   @deprecated     New code should use load_sgml/3.
  563
  564load_sgml_file(File, Term) :-
  565    load_sgml(File, Term, []).
  566
  567%!  load_xml_file(+File, -DOM) is det.
  568%
  569%   Load XML from File and unify   the  resulting DOM structure with
  570%   DOM.
  571%
  572%   @deprecated     New code should use load_xml/3.
  573
  574load_xml_file(File, Term) :-
  575    load_xml(File, Term, []).
  576
  577%!  load_html_file(+File, -DOM) is det.
  578%
  579%   Load HTML from File and unify   the resulting DOM structure with
  580%   DOM.
  581%
  582%   @deprecated     New code should use load_html/3.
  583
  584load_html_file(File, DOM) :-
  585    load_html(File, DOM, []).
  586
  587%!  load_html(+Input, -DOM, +Options) is det.
  588%
  589%   Load HTML text from Input and  unify the resulting DOM structure
  590%   with DOM. Options are passed   to load_structure/3, after adding
  591%   the following default options:
  592%
  593%     - dtd(DTD)
  594%     Pass the DTD for HTML as obtained using dtd(html, DTD).
  595%     - dialect(Dialect)
  596%     Current dialect from the Prolog flag =html_dialect=
  597%     - max_errors(-1)
  598%     - syntax_errors(quiet)
  599%     Most HTML encountered in the wild contains errors. Even in the
  600%     context of errors, the resulting DOM term is often a
  601%     reasonable guess at the intent of the author.
  602%
  603%   You may also want to use  the library(http/http_open) to support
  604%   loading from HTTP and HTTPS URLs. For example:
  605%
  606%   ==
  607%   :- use_module(library(http/http_open)).
  608%   :- use_module(library(sgml)).
  609%
  610%   load_html_url(URL, DOM) :-
  611%       load_html(URL, DOM, []).
  612%   ==
  613
  614load_html(File, Term, M:Options) :-
  615    current_prolog_flag(html_dialect, Dialect),
  616    dtd(Dialect, DTD),
  617    merge_options(Options,
  618                  [ dtd(DTD),
  619                    dialect(Dialect),
  620                    max_errors(-1),
  621                    syntax_errors(quiet)
  622                  ], Options1),
  623    load_structure(File, Term, M:Options1).
  624
  625%!  load_xml(+Input, -DOM, +Options) is det.
  626%
  627%   Load XML text from Input and   unify the resulting DOM structure
  628%   with DOM. Options are passed   to load_structure/3, after adding
  629%   the following default options:
  630%
  631%     - dialect(xml)
  632
  633load_xml(Input, DOM, M:Options) :-
  634    merge_options(Options,
  635                  [ dialect(xml)
  636                  ], Options1),
  637    load_structure(Input, DOM, M:Options1).
  638
  639%!  load_sgml(+Input, -DOM, +Options) is det.
  640%
  641%   Load SGML text from Input and  unify the resulting DOM structure
  642%   with DOM. Options are passed   to load_structure/3, after adding
  643%   the following default options:
  644%
  645%     - dialect(sgml)
  646
  647load_sgml(Input, DOM, M:Options) :-
  648    merge_options(Options,
  649                  [ dialect(sgml)
  650                  ], Options1),
  651    load_structure(Input, DOM, M:Options1).
  652
  653
  654
  655                 /*******************************
  656                 *            ENCODING          *
  657                 *******************************/
  658
  659%!  xml_quote_attribute(+In, -Quoted) is det.
  660%!  xml_quote_cdata(+In, -Quoted) is det.
  661%
  662%   Backward  compatibility  for  versions  that  allow  to  specify
  663%   encoding. All characters that cannot fit the encoding are mapped
  664%   to XML character entities (&#dd;).  Using   ASCII  is the safest
  665%   value.
  666
  667xml_quote_attribute(In, Quoted) :-
  668    xml_quote_attribute(In, Quoted, ascii).
  669
  670xml_quote_cdata(In, Quoted) :-
  671    xml_quote_cdata(In, Quoted, ascii).
  672
  673%!  xml_name(+Atom) is semidet.
  674%
  675%   True if Atom is a valid XML name.
  676
  677xml_name(In) :-
  678    xml_name(In, ascii).
  679
  680
  681                 /*******************************
  682                 *    XML CHARACTER CLASSES     *
  683                 *******************************/
  684
  685%!  xml_basechar(+CodeOrChar) is semidet.
  686%!  xml_ideographic(+CodeOrChar) is semidet.
  687%!  xml_combining_char(+CodeOrChar) is semidet.
  688%!  xml_digit(+CodeOrChar) is semidet.
  689%!  xml_extender(+CodeOrChar) is semidet.
  690%
  691%   XML  character  classification   predicates.    Each   of  these
  692%   predicates accept both a character   (one-character  atom) and a
  693%   code (integer).
  694%
  695%   @see http://www.w3.org/TR/2006/REC-xml-20060816
  696
  697
  698                 /*******************************
  699                 *         TYPE CHECKING        *
  700                 *******************************/
  701
  702%!  xml_is_dom(@Term) is semidet.
  703%
  704%   True  if  term  statisfies   the    structure   as  returned  by
  705%   load_structure/3 and friends.
  706
  707xml_is_dom(0) :- !, fail.               % catch variables
  708xml_is_dom(List) :-
  709    is_list(List),
  710    !,
  711    xml_is_content_list(List).
  712xml_is_dom(Term) :-
  713    xml_is_element(Term).
  714
  715xml_is_content_list([]).
  716xml_is_content_list([H|T]) :-
  717    xml_is_content(H),
  718    xml_is_content_list(T).
  719
  720xml_is_content(0) :- !, fail.
  721xml_is_content(pi(Pi)) :-
  722    !,
  723    atom(Pi).
  724xml_is_content(CDATA) :-
  725    atom(CDATA),
  726    !.
  727xml_is_content(CDATA) :-
  728    string(CDATA),
  729    !.
  730xml_is_content(Term) :-
  731    xml_is_element(Term).
  732
  733xml_is_element(element(Name, Attributes, Content)) :-
  734    dom_name(Name),
  735    dom_attributes(Attributes),
  736    xml_is_content_list(Content).
  737
  738dom_name(NS:Local) :-
  739    atom(NS),
  740    atom(Local),
  741    !.
  742dom_name(Local) :-
  743    atom(Local).
  744
  745dom_attributes(0) :- !, fail.
  746dom_attributes([]).
  747dom_attributes([H|T]) :-
  748    dom_attribute(H),
  749    dom_attributes(T).
  750
  751dom_attribute(Name=Value) :-
  752    dom_name(Name),
  753    atomic(Value).
  754
  755
  756                 /*******************************
  757                 *            MESSAGES          *
  758                 *******************************/
  759:- multifile
  760    prolog:message/3.  761
  762%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  763
  764prolog:message(sgml(Parser, File, Line, Message)) -->
  765    { get_sgml_parser(Parser, dialect(Dialect))
  766    },
  767    [ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
  768
  769
  770                 /*******************************
  771                 *         XREF SUPPORT         *
  772                 *******************************/
  773
  774:- multifile
  775    prolog:called_by/2.  776
  777prolog:called_by(sgml_parse(_, Options), Called) :-
  778    findall(Meta, meta_call_term(_, Meta, Options), Called).
  779
  780meta_call_term(T, G+N, Options) :-
  781    T = call(Event, G),
  782    pmember(T, Options),
  783    call_params(Event, Term),
  784    functor(Term, _, N).
  785
  786pmember(X, List) :-                     % member for partial lists
  787    nonvar(List),
  788    List = [H|T],
  789    (   X = H
  790    ;   pmember(X, T)
  791    ).
  792
  793call_params(begin, begin(tag,attributes,parser)).
  794call_params(end,   end(tag,parser)).
  795call_params(cdata, cdata(cdata,parser)).
  796call_params(pi,    pi(cdata,parser)).
  797call_params(decl,  decl(cdata,parser)).
  798call_params(error, error(severity,message,parser)).
  799call_params(xmlns, xmlns(namespace,url,parser)).
  800call_params(urlns, urlns(url,url,parser)).
  801
  802                 /*******************************
  803                 *           SANDBOX            *
  804                 *******************************/
  805
  806:- multifile
  807    sandbox:safe_primitive/1,
  808    sandbox:safe_meta_predicate/1.  809
  810sandbox:safe_meta_predicate(sgml:load_structure/3).
  811sandbox:safe_primitive(sgml:dtd(Dialect, _)) :-
  812    dtd_alias(Dialect, _).
  813sandbox:safe_primitive(sgml:xml_quote_attribute(_,_,_)).
  814sandbox:safe_primitive(sgml:xml_quote_cdata(_,_,_)).
  815sandbox:safe_primitive(sgml:xml_name(_,_)).
  816sandbox:safe_primitive(sgml:xml_basechar(_)).
  817sandbox:safe_primitive(sgml:xml_ideographic(_)).
  818sandbox:safe_primitive(sgml:xml_combining_char(_)).
  819sandbox:safe_primitive(sgml:xml_digit(_)).
  820sandbox:safe_primitive(sgml:xml_extender(_)).
  821sandbox:safe_primitive(sgml:iri_xml_namespace(_,_,_)).
  822sandbox:safe_primitive(sgml:xsd_number_string(_,_)).
  823sandbox:safe_primitive(sgml:xsd_time_string(_,_,_))