1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker & Richard O'Keefe 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2004-2016, University of Amsterdam 7 VU University Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(sgml_write, 37 [ html_write/2, % +Data, +Options 38 html_write/3, % +Stream, +Data, +Options 39 sgml_write/2, % +Data, +Options 40 sgml_write/3, % +Stream, +Data, +Options 41 xml_write/2, % +Data, +Options 42 xml_write/3 % +Stream, +Data, +Options 43 ]). 44:- autoload(library(assoc), 45 [get_assoc/3,empty_assoc/1,put_assoc/4,list_to_assoc/2]). 46:- autoload(library(error), 47 [ must_be/2, 48 domain_error/2, 49 instantiation_error/1, 50 type_error/2 51 ]). 52:- autoload(library(gensym),[gensym/2]). 53:- autoload(library(lists),[select/3]). 54:- autoload(library(option),[option/3]). 55:- autoload(library(sgml),[dtd/2,dtd_property/2]). 56 57:- predicate_options(xml_write/2, 2, [pass_to(xml_write/3, 3)]). 58:- predicate_options(xml_write/3, 3, 59 [ dtd(any), 60 doctype(atom), 61 public(atom), 62 system(atom), 63 header(boolean), 64 nsmap(list), 65 indent(nonneg), 66 layout(boolean), 67 net(boolean), 68 cleanns(boolean) 69 ]). 70 71:- multifile 72 xmlns/2. % NS, URI
true
(default), remove duplicate xmlns
attributes.doctype(_)
, public(_)
, or
system(_)
is provided in Options.<foo/> (default, net(true)) <foo></foo> (net(false))
For SGML, this applies to empty elements, so you get
<foo> (if foo is declared to be EMPTY in the DTD) <foo></foo> (default, net(false)) <foo// (net(true))
and also to elements with character content not containing /
<b>xxx</b> (default, net(false)) <b/xxx/ (net(true)).
Note that if the stream is UTF-8, the system will write special characters as UTF-8 sequences, while if it is ISO Latin-1 it will use (character) entities if there is a DTD that provides them, otherwise it will use numeric character references.
176xml_write(Data, Options) :- 177 current_output(Stream), 178 xml_write(Stream, Data, Options). 179 180xml_write(Stream0, Data, Options) :- 181 fix_user_stream(Stream0, Stream), 182 ( stream_property(Stream, encoding(text)) 183 -> set_stream(Stream, encoding(utf8)), 184 call_cleanup(xml_write(Stream, Data, Options), 185 set_stream(Stream, encoding(text))) 186 ; new_state(xml, State), 187 init_state(Options, State), 188 get_state(State, nsmap, NSMap), 189 add_missing_namespaces(Data, NSMap, Data1), 190 emit_xml_encoding(Stream, Options), 191 emit_doctype(Options, Data, Stream), 192 write_initial_indent(State, Stream), 193 emit(Data1, Stream, State) 194 ). 195 196 197sgml_write(Data, Options) :- 198 current_output(Stream), 199 sgml_write(Stream, Data, Options). 200 201sgml_write(Stream0, Data, Options) :- 202 fix_user_stream(Stream0, Stream), 203 ( stream_property(Stream, encoding(text)) 204 -> set_stream(Stream, encoding(utf8)), 205 call_cleanup(sgml_write(Stream, Data, Options), 206 set_stream(Stream, encoding(text))) 207 ; new_state(sgml, State), 208 init_state(Options, State), 209 write_initial_indent(State, Stream), 210 emit_doctype(Options, Data, Stream), 211 emit(Data, Stream, State) 212 ). 213 214 215html_write(Data, Options) :- 216 current_output(Stream), 217 html_write(Stream, Data, Options). 218 219html_write(Stream, Data, Options) :- 220 sgml_write(Stream, Data, 221 [ dtd(html) 222 | Options 223 ]). 224 225fix_user_stream(user, user_output) :- !. 226fix_user_stream(Stream, Stream). 227 228 229init_state([], _). 230init_state([H|T], State) :- 231 update_state(H, State), 232 init_state(T, State). 233 234update_state(dtd(DTD), State) :- 235 !, 236 ( atom(DTD) 237 -> dtd(DTD, DTDObj) 238 ; DTDObj = DTD 239 ), 240 set_state(State, dtd, DTDObj), 241 dtd_character_entities(DTDObj, EntityMap), 242 set_state(State, entity_map, EntityMap). 243update_state(nsmap(Map), State) :- 244 !, 245 set_state(State, nsmap, Map). 246update_state(cleanns(Bool), State) :- 247 !, 248 must_be(boolean, Bool), 249 set_state(State, cleanns, Bool). 250update_state(indent(Indent), State) :- 251 !, 252 must_be(integer, Indent), 253 set_state(State, indent, Indent). 254update_state(layout(Bool), State) :- 255 !, 256 must_be(boolean, Bool), 257 set_state(State, layout, Bool). 258update_state(doctype(_), _) :- !. 259update_state(public(_), _) :- !. 260update_state(system(_), _) :- !. 261update_state(net(Bool), State) :- 262 !, 263 must_be(boolean, Bool), 264 set_state(State, net, Bool). 265update_state(header(Bool), _) :- 266 !, 267 must_be(boolean, Bool). 268update_state(Option, _) :- 269 domain_error(xml_write_option, Option). 270 271% emit_xml_encoding(+Stream, +Options) 272% 273% Emit the XML fileheader with encoding information. Setting the 274% right encoding on the output stream must be done before calling 275% xml_write/3. 276 277emit_xml_encoding(Out, Options) :- 278 option(header(Hdr), Options, true), 279 Hdr == true, 280 !, 281 stream_property(Out, encoding(Encoding)), 282 ( ( Encoding == utf8 283 ; Encoding == wchar_t 284 ) 285 -> format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', []) 286 ; Encoding == iso_latin_1 287 -> format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', []) 288 ; domain_error(xml_encoding, Encoding) 289 ). 290emit_xml_encoding(_, _).
300emit_doctype(_Options, Data, Out) :- 301 ( Data = [_|_], memberchk(element(html,Att,_), Data) 302 ; Data = element(html,Att,_) 303 ), 304 memberchk(version=Version, Att), 305 !, 306 format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]). 307emit_doctype(Options, Data, Out) :- 308 ( memberchk(public(PubId), Options) -> true 309 ; PubId = (-) 310 ), 311 ( memberchk(system(SysId), Options) -> true 312 ; SysId = (-) 313 ), 314 \+ (PubId == (-), 315 SysId == (-), 316 \+ memberchk(doctype(_), Options) 317 ), 318 ( Data = element(DocType,_,_) 319 ; Data = [_|_], memberchk(element(DocType,_,_), Data) 320 ; memberchk(doctype(DocType), Options) 321 ), 322 !, 323 write_doctype(Out, DocType, PubId, SysId). 324emit_doctype(_, _, _). 325 326write_doctype(Out, DocType, -, -) :- 327 !, 328 format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]). 329write_doctype(Out, DocType, -, SysId) :- 330 !, 331 format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]). 332write_doctype(Out, DocType, PubId, -) :- 333 !, 334 format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]). 335write_doctype(Out, DocType, PubId, SysId) :- 336 format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
343emit(Var, _, _) :- 344 var(Var), 345 !, 346 instantiation_error(Var). 347emit([], _, _) :- !. 348emit([H|T], Out, State) :- 349 !, 350 emit(H, Out, State), 351 emit(T, Out, State). 352emit(CDATA, Out, State) :- 353 atomic(CDATA), 354 !, 355 sgml_write_content(Out, CDATA, State). 356emit(Element, Out, State) :- 357 \+ \+ emit_element(Element, Out, State). 358 359emit_element(pi(PI), Out, State) :- 360 !, 361 get_state(State, entity_map, EntityMap), 362 write(Out, <?), 363 write_quoted(Out, PI, '', EntityMap), 364 ( get_state(State, dialect, xml) -> 365 write(Out, ?>) 366 ; write(Out, >) 367 ). 368emit_element(element(Name, Attributes, Content), Out, State) :- 369 !, 370 must_be(list, Attributes), 371 must_be(list, Content), 372 ( get_state(State, dialect, xml) 373 -> update_nsmap(Attributes, CleanAttrs, State), 374 ( get_state(State, cleanns, true) 375 -> WriteAttrs = CleanAttrs 376 ; WriteAttrs = Attributes 377 ) 378 ; WriteAttrs = Attributes 379 ), 380 att_length(WriteAttrs, State, Alen), 381 ( Alen > 60, 382 get_state(State, layout, true) 383 -> Sep = nl, 384 AttIndent = 4 385 ; Sep = sp, 386 AttIndent = 0 387 ), 388 put_char(Out, '<'), 389 emit_name(Name, Out, State), 390 ( AttIndent > 0 391 -> \+ \+ ( inc_indent(State, AttIndent), 392 attributes(WriteAttrs, Sep, Out, State) 393 ) 394 ; attributes(WriteAttrs, Sep, Out, State) 395 ), 396 content(Content, Out, Name, State). 397emit_element(E, _, _) :- 398 type_error(xml_dom, E). 399 400attributes([], _, _, _). 401attributes([H|T], Sep, Out, State) :- 402 ( Sep == nl 403 -> write_indent(State, Out) 404 ; put_char(Out, ' ') 405 ), 406 attribute(H, Out, State), 407 attributes(T, Sep, Out, State). 408 409attribute(Name=Value, Out, State) :- 410 emit_name(Name, Out, State), 411 put_char(Out, =), 412 sgml_write_attribute(Out, Value, State). 413 414att_length(Atts, State, Len) :- 415 att_length(Atts, State, 0, Len). 416 417att_length([], _, Len, Len). 418att_length([A0|T], State, Len0, Len) :- 419 alen(A0, State, AL), 420 Len1 is Len0 + 1 + AL, 421 att_length(T, State, Len1, Len). 422 423alen(ns(NS, _URI):Name=Value, _State, Len) :- 424 !, 425 atom_length(Value, AL), 426 vlen(Name, NL), 427 atom_length(NS, NsL), 428 Len is AL+NL+NsL+3. 429alen(URI:Name=Value, State, Len) :- 430 !, 431 atom_length(Value, AL), 432 vlen(Name, NL), 433 get_state(State, nsmap, Nsmap), 434 ( memberchk(NS=URI, Nsmap) 435 -> atom_length(NS, NsL) 436 ; atom_length(URI, NsL) 437 ), 438 Len is AL+NL+NsL+3. 439alen(Name=Value, _, Len) :- 440 atom_length(Name, NL), 441 vlen(Value, AL), 442 Len is AL+NL+3. 443 444vlen(Value, Len) :- 445 is_list(Value), 446 !, 447 vlen_list(Value, 0, Len). 448vlen(Value, Len) :- 449 atom_length(Value, Len). 450 451vlen_list([], L, L). 452vlen_list([H|T], L0, L) :- 453 atom_length(H, HL), 454 ( L0 == 0 455 -> L1 is L0 + HL 456 ; L1 is L0 + HL + 1 457 ), 458 vlen_list(T, L1, L). 459 460 461emit_name(Name, Out, _) :- 462 atom(Name), 463 !, 464 write(Out, Name). 465emit_name(ns(NS,_URI):Name, Out, _State) :- 466 !, 467 ( NS == '' 468 -> write(Out, Name) 469 ; format(Out, '~w:~w', [NS, Name]) 470 ). 471emit_name(URI:Name, Out, State) :- 472 get_state(State, nsmap, NSMap), 473 memberchk(NS=URI, NSMap), 474 !, 475 ( NS == [] 476 -> write(Out, Name) 477 ; format(Out, '~w:~w', [NS, Name]) 478 ). 479emit_name(Term, Out, _) :- % error? 480 write(Out, Term).
490update_nsmap(Attributes, Attributes1, State) :- 491 get_state(State, nsmap, Map0), 492 update_nsmap(Attributes, Attributes1, Map0, Map), 493 set_state(State, nsmap, Map). 494 495update_nsmap([], [], Map, Map). 496update_nsmap([xmlns:NS=URI|T], Attrs, Map0, Map) :- 497 !, 498 ( memberchk(NS=URI, Map0) 499 -> update_nsmap(T, Attrs, Map0, Map) 500 ; set_nsmap(NS, URI, Map0, Map1), 501 Attrs = [xmlns:NS=URI|Attrs1], 502 update_nsmap(T, Attrs1, Map1, Map) 503 ). 504update_nsmap([xmlns=URI|T], Attrs, Map0, Map) :- 505 !, 506 ( memberchk([]=URI, Map0) 507 -> update_nsmap(T, Attrs, Map0, Map) 508 ; set_nsmap([], URI, Map0, Map1), 509 Attrs = [xmlns=URI|Attrs1], 510 update_nsmap(T, Attrs1, Map1, Map) 511 ). 512update_nsmap([H|T0], [H|T], Map0, Map) :- 513 !, 514 update_nsmap(T0, T, Map0, Map). 515 516set_nsmap(NS, URI, Map0, Map) :- 517 select(NS=_, Map0, Map1), 518 !, 519 Map = [NS=URI|Map1]. 520set_nsmap(NS, URI, Map, [NS=URI|Map]).
530content([], Out, Element, State) :- % empty element 531 !, 532 ( get_state(State, net, true) 533 -> ( get_state(State, dialect, xml) -> 534 write(Out, />) 535 ; empty_element(State, Element) -> 536 write(Out, >) 537 ; write(Out, //) 538 ) 539 ;/* get_state(State, net, false) */ 540 write(Out, >), 541 ( get_state(State, dialect, sgml), 542 empty_element(State, Element) 543 -> true 544 ; emit_close(Element, Out, State) 545 ) 546 ). 547content([CDATA], Out, Element, State) :- 548 atomic(CDATA), 549 !, 550 ( get_state(State, dialect, sgml), 551 get_state(State, net, true), 552 \+ sub_atom(CDATA, _, _, _, /), 553 write_length(CDATA, Len, []), 554 Len < 20 555 -> write(Out, /), 556 sgml_write_content(Out, CDATA, State), 557 write(Out, /) 558 ; verbatim_element(Element, State) 559 -> write(Out, >), 560 write(Out, CDATA), 561 emit_close(Element, Out, State) 562 ;/* XML or not NET */ 563 write(Out, >), 564 sgml_write_content(Out, CDATA, State), 565 emit_close(Element, Out, State) 566 ). 567content(Content, Out, Element, State) :- 568 get_state(State, layout, true), 569 /* If xml:space='preserve' is present, */ 570 /* we MUST NOT tamper with white space at all. */ 571 \+ (Element = element(_,Atts,_), 572 memberchk('xml:space'=preserve, Atts) 573 ), 574 element_content(Content, Elements), 575 !, 576 format(Out, >, []), 577 \+ \+ ( 578 inc_indent(State), 579 write_element_content(Elements, Out, State) 580 ), 581 write_indent(State, Out), 582 emit_close(Element, Out, State). 583content(Content, Out, Element, State) :- 584 format(Out, >, []), 585 write_mixed_content(Content, Out, Element, State), 586 emit_close(Element, Out, State). 587 588verbatim_element(Element, State) :- 589 verbatim_element(Element), 590 get_state(State, dtd, DTD), 591 DTD \== (-), 592 dtd_property(DTD, doctype(html)). 593 594verbatim_element(script). 595verbatim_element(style). 596 597emit_close(Element, Out, State) :- 598 write(Out, '</'), 599 emit_name(Element, Out, State), 600 write(Out, '>'). 601 602 603write_mixed_content([], _, _, _). 604write_mixed_content([H|T], Out, Element, State) :- 605 write_mixed_content_element(H, Out, State), 606 write_mixed_content(T, Out, Element, State). 607 608write_mixed_content_element(H, Out, State) :- 609 ( atom(H) 610 -> sgml_write_content(Out, H, State) 611 ; string(H) 612 -> sgml_write_content(Out, H, State) 613 ; functor(H, element, 3) 614 -> emit(H, Out, State) 615 ; functor(H, pi, 1) 616 -> emit(H, Out, State) 617 ; var(H) 618 -> instantiation_error(H) 619 ; H = sdata(Data) % cannot be written without entity! 620 -> print_message(warning, sgml_write(sdata_as_cdata(Data))), 621 sgml_write_content(Out, Data, State) 622 ; type_error(sgml_content, H) 623 ). 624 625 626element_content([], []). 627element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- 628 !, 629 element_content(T0, T). 630element_content([Blank|T0], T) :- 631 atom(Blank), 632 atom_codes(Blank, Codes), 633 all_blanks(Codes), 634 element_content(T0, T). 635 636all_blanks([]). 637all_blanks([H|T]) :- 638 code_type(H, space), 639 all_blanks(T). 640 641write_element_content([], _, _). 642write_element_content([H|T], Out, State) :- 643 write_indent(State, Out), 644 emit(H, Out, State), 645 write_element_content(T, Out, State). 646 647 648 /******************************* 649 * NAMESPACES * 650 *******************************/
element(s)
to
deal with missing namespaces.657add_missing_namespaces([], _, []) :- !. 658add_missing_namespaces([H0|T0], Def, [H|T]) :- 659 !, 660 add_missing_namespaces(H0, Def, H), 661 add_missing_namespaces(T0, Def, T). 662add_missing_namespaces(Elem0, Def, Elem) :- 663 Elem0 = element(Name, Atts0, Content), 664 !, 665 missing_namespaces(Elem0, Def, Missing), 666 ( Missing == [] 667 -> Elem = Elem0 668 ; add_missing_ns(Missing, Atts0, Atts), 669 Elem = element(Name, Atts, Content) 670 ). 671add_missing_namespaces(DOM, _, DOM). % CDATA, etc. 672 673add_missing_ns([], Atts, Atts). 674add_missing_ns([H|T], Atts0, Atts) :- 675 generate_ns(H, NS, URL), 676 add_missing_ns(T, [xmlns:NS=URL|Atts0], Atts).
682generate_ns(URI, NS, URI) :- 683 xmlns(NS, URI), 684 !. 685generate_ns(ns(NS, URI), NS, URI) :- 686 !. 687generate_ns(URI, NS, URI) :- 688 default_ns(URI, NS), 689 !. 690generate_ns(URI, NS, URI) :- 691 gensym(xns, NS).
Default XML namespaces are:
712:- multifile 713 rdf_db:ns/2. 714 715default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi). 716default_ns('http://www.w3.org/2001/XMLSchema', xs). 717default_ns('http://www.w3.org/1999/xhtml', xhtml). 718default_ns('http://schemas.xmlsoap.org/soap/envelope/', soap11). 719default_ns('http://www.w3.org/2003/05/soap-envelope', soap12). 720default_ns(URI, NS) :- 721 rdf_db:ns(NS, URI).
728missing_namespaces(DOM, Defined, Missing) :- 729 missing_namespaces(DOM, Defined, [], Missing). 730 731missing_namespaces([], _, L, L) :- !. 732missing_namespaces([H|T], Def, L0, L) :- 733 !, 734 missing_namespaces(H, Def, L0, L1), 735 missing_namespaces(T, Def, L1, L). 736missing_namespaces(element(Name, Atts, Content), Def, L0, L) :- 737 !, 738 update_nsmap(Atts, _, Def, Def1), 739 missing_ns(Name, Def1, L0, L1), 740 missing_att_ns(Atts, Def1, L1, L2), 741 missing_namespaces(Content, Def1, L2, L). 742missing_namespaces(_, _, L, L). 743 744missing_att_ns([], _, M, M). 745missing_att_ns([Name=_|T], Def, M0, M) :- 746 missing_ns(Name, Def, M0, M1), 747 missing_att_ns(T, Def, M1, M). 748 749missing_ns(ns(NS, URI):_, Def, M0, M) :- 750 !, 751 ( ( memberchk(NS=URI, Def) 752 ; memberchk(NS=URI, M0) 753 ) 754 -> M = M0 755 ; NS == '' 756 -> M = M0 757 ; M = [ns(NS, URI)|M0] 758 ). 759missing_ns(URI:_, Def, M0, M) :- 760 !, 761 ( ( memberchk(_=URI, Def) 762 ; memberchk(URI, M0) 763 ; URI = xml % predefined ones 764 ; URI = xmlns 765 ) 766 -> M = M0 767 ; M = [URI|M0] 768 ). 769missing_ns(_, _, M, M). 770 771 /******************************* 772 * QUOTED WRITE * 773 *******************************/ 774 775sgml_write_attribute(Out, Values, State) :- 776 is_list(Values), 777 !, 778 get_state(State, entity_map, EntityMap), 779 put_char(Out, '"'), 780 write_quoted_list(Values, Out, '"<&\r\n\t', EntityMap), 781 put_char(Out, '"'). 782sgml_write_attribute(Out, Value, State) :- 783 is_text(Value), 784 !, 785 get_state(State, entity_map, EntityMap), 786 put_char(Out, '"'), 787 write_quoted(Out, Value, '"<&\r\n\t', EntityMap), 788 put_char(Out, '"'). 789sgml_write_attribute(Out, Value, _State) :- 790 number(Value), 791 !, 792 format(Out, '"~w"', [Value]). 793sgml_write_attribute(_, Value, _) :- 794 type_error(sgml_attribute_value, Value). 795 796write_quoted_list([], _, _, _). 797write_quoted_list([H|T], Out, Escape, EntityMap) :- 798 write_quoted(Out, H, Escape, EntityMap), 799 ( T == [] 800 -> true 801 ; put_char(Out, ' '), 802 write_quoted_list(T, Out, Escape, EntityMap) 803 ). 804 805 806sgml_write_content(Out, Value, State) :- 807 is_text(Value), 808 !, 809 get_state(State, entity_map, EntityMap), 810 write_quoted(Out, Value, '<&>\r', EntityMap). 811sgml_write_content(Out, Value, _) :- 812 write(Out, Value). 813 814is_text(Value) :- atom(Value), !. 815is_text(Value) :- string(Value), !. 816 817write_quoted(Out, Atom, Escape, EntityMap) :- 818 atom(Atom), 819 !, 820 atom_codes(Atom, Codes), 821 writeq(Codes, Out, Escape, EntityMap). 822write_quoted(Out, String, Escape, EntityMap) :- 823 string(String), 824 !, 825 string_codes(String, Codes), 826 writeq(Codes, Out, Escape, EntityMap). 827write_quoted(_, String, _, _) :- 828 type_error(atom_or_string, String).
833writeq([], _, _, _). 834writeq([H|T], Out, Escape, EntityMap) :- 835 ( char_code(HC, H), 836 sub_atom(Escape, _, _, _, HC) 837 -> write_entity(H, Out, EntityMap) 838 ; H >= 256 839 -> ( stream_property(Out, encoding(Enc)), 840 unicode_encoding(Enc) 841 -> put_code(Out, H) 842 ; write_entity(H, Out, EntityMap) 843 ) 844 ; put_code(Out, H) 845 ), 846 writeq(T, Out, Escape, EntityMap). 847 848unicode_encoding(utf8). 849unicode_encoding(wchar_t). 850unicode_encoding(unicode_le). 851unicode_encoding(unicode_be). 852 853write_entity(Code, Out, EntityMap) :- 854 ( get_assoc(Code, EntityMap, EntityName) 855 -> format(Out, '&~w;', [EntityName]) 856 ; format(Out, '&#x~16R;', [Code]) 857 ). 858 859 860 /******************************* 861 * INDENTATION * 862 *******************************/ 863 864write_initial_indent(State, Out) :- 865 ( get_state(State, indent, Indent), 866 Indent > 0 867 -> emit_indent(Indent, Out) 868 ; true 869 ). 870 871write_indent(State, _) :- 872 get_state(State, layout, false), 873 !. 874write_indent(State, Out) :- 875 get_state(State, indent, Indent), 876 emit_indent(Indent, Out). 877 878emit_indent(Indent, Out) :- 879 Tabs is Indent // 8, 880 Spaces is Indent mod 8, 881 format(Out, '~N', []), 882 write_n(Tabs, '\t', Out), 883 write_n(Spaces, ' ', Out). 884 885write_n(N, Char, Out) :- 886 ( N > 0 887 -> put_char(Out, Char), 888 N2 is N - 1, 889 write_n(N2, Char, Out) 890 ; true 891 ). 892 893inc_indent(State) :- 894 inc_indent(State, 2). 895 896inc_indent(State, Inc) :- 897 state(indent, Arg), 898 arg(Arg, State, I0), 899 I is I0 + Inc, 900 setarg(Arg, State, I). 901 902 903 /******************************* 904 * DTD HANDLING * 905 *******************************/
912empty_element(State, Element) :-
913 get_state(State, dtd, DTD),
914 DTD \== (-),
915 dtd_property(DTD, element(Element, _, empty)).
923dtd_character_entities(DTD, Map) :- 924 empty_assoc(Empty), 925 dtd_property(DTD, entities(Entities)), 926 fill_entity_map(Entities, DTD, Empty, Map). 927 928fill_entity_map([], _, Map, Map). 929fill_entity_map([H|T], DTD, Map0, Map) :- 930 ( dtd_property(DTD, entity(H, CharEntity)), 931 atom(CharEntity), 932 ( sub_atom(CharEntity, 0, _, _, '&#'), 933 sub_atom(CharEntity, _, _, 0, ';') 934 -> sub_atom(CharEntity, 2, _, 1, Name), 935 atom_number(Name, Code) 936 ; atom_length(CharEntity, 1), 937 char_code(CharEntity, Code) 938 ) 939 -> put_assoc(Code, Map0, H, Map1), 940 fill_entity_map(T, DTD, Map1, Map) 941 ; fill_entity_map(T, DTD, Map0, Map) 942 ). 943 944 945 946 /******************************* 947 * FIELDS * 948 *******************************/ 949 950state(indent, 1). % current indentation 951state(layout, 2). % use layout (true/false) 952state(dtd, 3). % DTD for entity names 953state(entity_map, 4). % compiled entity-map 954state(dialect, 5). % xml/sgml 955state(nsmap, 6). % defined namespaces 956state(net, 7). % Should null end-tags be used? 957state(cleanns, 8). % Remove duplicate xmlns declarations 958 959new_state(Dialect, 960 state( 961 0, % indent 962 true, % layout 963 -, % DTD 964 EntityMap, % entity_map 965 Dialect, % dialect 966 [], % NS=Full map 967 Net, % Null End-Tags? 968 true % Remove duplicate xmlns declarations 969 )) :- 970 ( Dialect == sgml 971 -> Net = false, 972 empty_assoc(EntityMap) 973 ; Net = true, 974 xml_entities(EntityMap) 975 ). 976 977get_state(State, Field, Value) :- 978 state(Field, Arg), 979 arg(Arg, State, Value). 980 981set_state(State, Field, Value) :- 982 state(Field, Arg), 983 setarg(Arg, State, Value). 984 985term_expansion(xml_entities(map), 986 xml_entities(Map)) :- 987 list_to_assoc([ 0'< - lt, 988 0'& - amp, 989 0'> - gt, 990 0'\' - apos, 991 0'\" - quot 992 ], Map). 993xml_entities(map). 994 995 /******************************* 996 * MESSAGES * 997 *******************************/ 998 999:- multifile 1000 prolog:message/3. 1001 1002prologmessage(sgml_write(sdata_as_cdata(Data))) --> 1003 [ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ]
XML/SGML writer module
This library provides the inverse functionality of the
sgml.pl
parser library, writing XML, SGML and HTML documents from the parsed output. It is intended to allow rewriting in a different dialect or encoding or to perform document transformation in Prolog on the parsed representation.The current implementation is particularly keen on getting character encoding and the use of character entities right. Some work has been done providing layout, but space handling in XML and SGML make this a very hazardous area.
The Prolog-based low-level character and escape handling is the real bottleneck in this library and will probably be moved to C in a later stage.