View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(dcg_basics,
   37	  [ white//0,			% <white inside line>
   38	    whites//0,			% <white inside line>*
   39	    blank//0,			% <blank>
   40	    blanks//0,			% <blank>*
   41	    nonblank//1,		% <nonblank>
   42	    nonblanks//1,		% <nonblank>* --> chars		(long)
   43	    blanks_to_nl//0,		% [space,tab,ret]*nl
   44	    string//1,			% <any>* -->chars		(short)
   45	    string_without//2,		% Exclude, -->chars		(long)
   46					% Characters
   47	    alpha_to_lower//1,		% Get lower|upper, return lower
   48					% Decimal numbers
   49	    digits//1,			% [0-9]* -->chars
   50	    digit//1,			% [0-9] --> char
   51	    integer//1,			% [+-][0-9]+ --> integer
   52	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   53	    number//1,			% integer | float
   54					% Hexadecimal numbers
   55	    xdigits//1,			% [0-9A-Fa-f]* --> 0-15*
   56	    xdigit//1,			% [0-9A-Fa-f] --> 0-15
   57	    xinteger//1,		% [0-9A-Fa-f]+ --> integer
   58
   59	    prolog_var_name//1,		% Read a Prolog variable name
   60
   61	    eos//0,			% Test end of input.
   62	    remainder//1,		% -List
   63
   64					% generation (TBD)
   65	    atom//1			% generate atom
   66	  ]).   67:- use_module(library(lists)).   68:- use_module(library(error)).   69
   70
   71/** <module> Various general DCG utilities
   72
   73This library provides various commonly  used   DCG  primitives acting on
   74list  of  character  *codes*.  Character   classification  is  based  on
   75code_type/2.
   76
   77This module started its life as  library(http/dcg_basics) to support the
   78HTTP protocol. Since then, it was increasingly  used in code that has no
   79relation to HTTP and therefore  this  library   was  moved  to  the core
   80library.
   81
   82@tbd	This is just a starting point. We need a comprehensive set of
   83	generally useful DCG primitives.
   84*/
   85
   86%%	string_without(+EndCodes, -Codes)// is det.
   87%
   88%	Take as many codes from the input  until the next character code
   89%	appears in the list EndCodes.  The   terminating  code itself is
   90%	left on the input.  Typical  use  is   to  read  upto  a defined
   91%	delimiter such as a newline  or   other  reserved character. For
   92%	example:
   93%
   94%	    ==
   95%	        ...,
   96%	        string_without("\n", RestOfLine)
   97%	    ==
   98%
   99%	@arg EndCodes is a list of character codes.
  100%	@see string//1.
  101
  102string_without(End, Codes) -->
  103	{ string(End), !,
  104	  string_codes(End, EndCodes)
  105	},
  106	list_string_without(EndCodes, Codes).
  107string_without(End, Codes) -->
  108	list_string_without(End, Codes).
  109
  110list_string_without(Not, [C|T]) -->
  111	[C],
  112	{ \+ memberchk(C, Not)
  113	}, !,
  114	list_string_without(Not, T).
  115list_string_without(_, []) -->
  116	[].
  117
  118%%	string(-Codes)// is nondet.
  119%
  120%	Take as few as possible tokens from the input, taking one more
  121%	each time on backtracking. This code is normally followed by a
  122%	test for a delimiter.  For example:
  123%
  124%	==
  125%	upto_colon(Atom) -->
  126%		string(Codes), ":", !,
  127%		{ atom_codes(Atom, Codes) }.
  128%	==
  129%
  130%	@see string_without//2.
  131
  132string([]) -->
  133	[].
  134string([H|T]) -->
  135	[H],
  136	string(T).
  137
  138%%	blanks// is det.
  139%
  140%	Skip zero or more white-space characters.
  141
  142blanks -->
  143	blank, !,
  144	blanks.
  145blanks -->
  146	[].
  147
  148%%	blank// is semidet.
  149%
  150%	Take next =space= character from input. Space characters include
  151%	newline.
  152%
  153%	@see white//0
  154
  155blank -->
  156	[C],
  157	{ nonvar(C),
  158	  code_type(C, space)
  159	}.
  160
  161%%	nonblanks(-Codes)// is det.
  162%
  163%	Take all =graph= characters
  164
  165nonblanks([H|T]) -->
  166	[H],
  167	{ code_type(H, graph)
  168	}, !,
  169	nonblanks(T).
  170nonblanks([]) -->
  171	[].
  172
  173%%	nonblank(-Code)// is semidet.
  174%
  175%	Code is the next non-blank (=graph=) character.
  176
  177nonblank(H) -->
  178	[H],
  179	{ code_type(H, graph)
  180	}.
  181
  182%%	blanks_to_nl// is semidet.
  183%
  184%	Take a sequence of blank//0 codes if blanks are followed by a
  185%	newline or end of the input.
  186
  187blanks_to_nl -->
  188	"\n", !.
  189blanks_to_nl -->
  190	blank, !,
  191	blanks_to_nl.
  192blanks_to_nl -->
  193	eos.
  194
  195%%	whites// is det.
  196%
  197%	Skip white space _inside_ a line.
  198%
  199%	@see blanks//0 also skips newlines.
  200
  201whites -->
  202	white, !,
  203	whites.
  204whites -->
  205	[].
  206
  207%%	white// is semidet.
  208%
  209%	Take next =white= character from input. White characters do
  210%	_not_ include newline.
  211
  212white -->
  213	[C],
  214	{ nonvar(C),
  215	  code_type(C, white)
  216	}.
  217
  218
  219		 /*******************************
  220		 *	 CHARACTER STUFF	*
  221		 *******************************/
  222
  223%%	alpha_to_lower(?C)// is semidet.
  224%
  225%	Read a letter (class  =alpha=)  and   return  it  as a lowercase
  226%	letter. If C is instantiated and the  DCG list is already bound,
  227%	C must be =lower= and matches both a lower and uppercase letter.
  228%	If the output list is unbound, its first element is bound to C.
  229%	For example:
  230%
  231%	  ==
  232%	  ?- alpha_to_lower(0'a, `AB`, R).
  233%	  R = [66].
  234%	  ?- alpha_to_lower(C, `AB`, R).
  235%	  C = 97, R = [66].
  236%	  ?- alpha_to_lower(0'a, L, R).
  237%	  L = [97|R].
  238%	  ==
  239
  240alpha_to_lower(L) -->
  241	[C],
  242	{   nonvar(C)
  243	->  code_type(C, alpha),
  244	    code_type(C, to_upper(L))
  245	;   L = C
  246	}.
  247
  248
  249		 /*******************************
  250		 *	      NUMBERS		*
  251		 *******************************/
  252
  253%%	digits(?Chars)// is det.
  254%%	digit(?Char)// is det.
  255%%	integer(?Integer)// is det.
  256%
  257%	Number processing. The predicate  digits//1   matches a possibly
  258%	empty set of digits,  digit//1  processes   a  single  digit and
  259%	integer processes an  optional  sign   followed  by  a non-empty
  260%	sequence of digits into an integer.
  261
  262digits([H|T]) -->
  263	digit(H), !,
  264	digits(T).
  265digits([]) -->
  266	[].
  267
  268digit(C) -->
  269	[C],
  270	{ code_type(C, digit)
  271	}.
  272
  273integer(I, Head, Tail) :-
  274	nonvar(I), !,
  275	format(codes(Head, Tail), '~d', [I]).
  276integer(I) -->
  277	int_codes(Codes),
  278	{ number_codes(I, Codes)
  279	}.
  280
  281int_codes([C,D0|D]) -->
  282	sign(C), !,
  283	digit(D0),
  284	digits(D).
  285int_codes([D0|D]) -->
  286	digit(D0),
  287	digits(D).
  288
  289
  290%%	float(?Float)// is det.
  291%
  292%	Process a floating  point  number.   The  actual  conversion  is
  293%	controlled by number_codes/2.
  294
  295float(F, Head, Tail) :-
  296	float(F), !,
  297	with_output_to(codes(Head, Tail), write(F)).
  298float(F) -->
  299	number(F),
  300	{ float(F) }.
  301
  302%%	number(+Number)// is det.
  303%%	number(-Number)// is semidet.
  304%
  305%	Generate extract a number. Handles   both  integers and floating
  306%	point numbers.
  307
  308number(N, Head, Tail) :-
  309	number(N), !,
  310	format(codes(Head, Tail), '~w', N).
  311number(N) -->
  312	{ var(N)
  313	},
  314	!,
  315	int_codes(I),
  316	(   dot,
  317	    digit(DF0),
  318	    digits(DF)
  319	->  {F = [0'., DF0|DF]}
  320	;   {F = []}
  321	),
  322	(   exp
  323	->  int_codes(DI),
  324	    {E=[0'e|DI]}
  325	;   {E = []}
  326	),
  327	{ append([I, F, E], Codes),
  328	  number_codes(N, Codes)
  329	}.
  330number(N) -->
  331	{ type_error(number, N) }.
  332
  333sign(0'-) --> "-".
  334sign(0'+) --> "+".
  335
  336dot --> ".".
  337
  338exp --> "e".
  339exp --> "E".
  340
  341		 /*******************************
  342		 *	    HEX NUMBERS		*
  343		 *******************************/
  344
  345%%	xinteger(+Integer)// is det.
  346%%	xinteger(-Integer)// is semidet.
  347%
  348%	Generate or extract an integer from   a  sequence of hexadecimal
  349%	digits. Hexadecimal characters include both  uppercase (A-F) and
  350%	lowercase (a-f) letters. The value may   be  preceded by  a sign
  351%	(+/-)
  352
  353xinteger(Val, Head, Tail) :-
  354	integer(Val), !,
  355	format(codes(Head, Tail), '~16r', [Val]).
  356xinteger(Val) -->
  357	sign(C), !,
  358	xdigit(D0),
  359	xdigits(D),
  360	{ mkval([D0|D], 16, Val0),
  361	  (   C == 0'-
  362	  ->  Val is -Val0
  363	  ;   Val = Val0
  364	  )
  365	}.
  366xinteger(Val) -->
  367	xdigit(D0),
  368	xdigits(D),
  369	{ mkval([D0|D], 16, Val)
  370	}.
  371
  372%%	xdigit(-Weight)// is semidet.
  373%
  374%	True if the next code is a  hexdecimal digit with Weight. Weight
  375%	is  between  0  and  15.  Hexadecimal  characters  include  both
  376%	uppercase (A-F) and lowercase (a-f) letters.
  377
  378xdigit(D) -->
  379	[C],
  380	{ code_type(C, xdigit(D))
  381	}.
  382
  383%%	xdigits(-WeightList)// is det.
  384%
  385%	List of weights of a sequence   of hexadecimal codes. WeightList
  386%	may be empty. Hexadecimal  characters   include  both  uppercase
  387%	(A-F) and lowercase (a-f) letters.
  388
  389xdigits([D0|D]) -->
  390	xdigit(D0), !,
  391	xdigits(D).
  392xdigits([]) -->
  393	[].
  394
  395mkval([W0|Weights], Base, Val) :-
  396	mkval(Weights, Base, W0, Val).
  397
  398mkval([], _, W, W).
  399mkval([H|T], Base, W0, W) :-
  400	W1 is W0*Base+H,
  401	mkval(T, Base, W1, W).
  402
  403
  404		 /*******************************
  405		 *	   END-OF-STRING	*
  406		 *******************************/
  407
  408%%	eos//
  409%
  410%	Matches  end-of-input.  The  implementation    behaves   as  the
  411%	following portable implementation:
  412%
  413%	  ==
  414%	  eos --> call(eos_).
  415%	  eos_([], []).
  416%	  ==
  417%
  418%	@tbd	This is a difficult concept and violates the _context free_
  419%		property of DCGs.  Explain the exact problems.
  420
  421eos([], []).
  422
  423%%	remainder(-List)//
  424%
  425%	Unify List with the remainder of the input.
  426
  427remainder(List, List, []).
  428
  429
  430		 /*******************************
  431		 *	   PROLOG SYNTAX		*
  432		 *******************************/
  433
  434%%	prolog_var_name(-Name:atom)// is semidet.
  435%
  436%	Matches a Prolog variable name. Primarily  intended to deal with
  437%	quasi quotations that embed Prolog variables.
  438
  439prolog_var_name(Name) -->
  440	[C0], { code_type(C0, prolog_var_start) }, !,
  441	prolog_id_cont(CL),
  442	{ atom_codes(Name, [C0|CL]) }.
  443
  444prolog_id_cont([H|T]) -->
  445	[H], { code_type(H, prolog_identifier_continue) }, !,
  446	prolog_id_cont(T).
  447prolog_id_cont([]) --> "".
  448
  449
  450		 /*******************************
  451		 *	     GENERATION		*
  452		 *******************************/
  453
  454%%	atom(++Atom)// is det.
  455%
  456%	Generate codes of Atom.  Current implementation uses write/1,
  457%	dealing with any Prolog term.  Atom must be ground though.
  458
  459atom(Atom, Head, Tail) :-
  460	must_be(ground, Atom),
  461	format(codes(Head, Tail), '~w', [Atom])