View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(dcg_basics,
   37	  [ white//0,			% <white inside line>
   38	    whites//0,			% <white inside line>*
   39	    blank//0,			% <blank>
   40	    blanks//0,			% <blank>*
   41	    nonblank//1,		% <nonblank>
   42	    nonblanks//1,		% <nonblank>* --> chars		(long)
   43	    blanks_to_nl//0,		% [space,tab,ret]*nl
   44	    string//1,			% <any>* -->chars		(short)
   45	    string_without//2,		% Exclude, -->chars		(long)
   46					% Characters
   47	    alpha_to_lower//1,		% Get lower|upper, return lower
   48					% Decimal numbers
   49	    digits//1,			% [0-9]* -->chars
   50	    digit//1,			% [0-9] --> char
   51	    integer//1,			% [+-][0-9]+ --> integer
   52	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   53	    number//1,			% integer | float
   54					% Hexadecimal numbers
   55	    xdigits//1,			% [0-9A-Fa-f]* --> 0-15*
   56	    xdigit//1,			% [0-9A-Fa-f] --> 0-15
   57	    xinteger//1,		% [0-9A-Fa-f]+ --> integer
   58
   59	    prolog_var_name//1,		% Read a Prolog variable name
   60
   61	    eos//0,			% Test end of input.
   62	    remainder//1,		% -List
   63
   64					% generation (TBD)
   65	    atom//1			% generate atom
   66	  ]).   67:- use_module(library(lists)).   68:- use_module(library(error)).

Various general DCG utilities

This library provides various commonly used DCG primitives acting on list of character codes. Character classification is based on code_type/2.

This module started its life as library(http/dcg_basics) to support the HTTP protocol. Since then, it was increasingly used in code that has no relation to HTTP and therefore this library was moved to the core library.

To be done
- This is just a starting point. We need a comprehensive set of generally useful DCG primitives. */
 string_without(+EndCodes, -Codes)// is det
Take as many codes from the input until the next character code appears in the list EndCodes. The terminating code itself is left on the input. Typical use is to read upto a defined delimiter such as a newline or other reserved character. For example:
    ...,
    string_without("\n", RestOfLine)
Arguments:
EndCodes- is a list of character codes.
See also
- string//1.
  102string_without(End, Codes) -->
  103	{ string(End), !,
  104	  string_codes(End, EndCodes)
  105	},
  106	list_string_without(EndCodes, Codes).
  107string_without(End, Codes) -->
  108	list_string_without(End, Codes).
  109
  110list_string_without(Not, [C|T]) -->
  111	[C],
  112	{ \+ memberchk(C, Not)
  113	}, !,
  114	list_string_without(Not, T).
  115list_string_without(_, []) -->
  116	[].
 string(-Codes)// is nondet
Take as few as possible tokens from the input, taking one more each time on backtracking. This code is normally followed by a test for a delimiter. For example:
upto_colon(Atom) -->
        string(Codes), ":", !,
        { atom_codes(Atom, Codes) }.
See also
- string_without//2.
  132string([]) -->
  133	[].
  134string([H|T]) -->
  135	[H],
  136	string(T).
 blanks// is det
Skip zero or more white-space characters.
  142blanks -->
  143	blank, !,
  144	blanks.
  145blanks -->
  146	[].
 blank// is semidet
Take next space character from input. Space characters include newline.
See also
- white//0
  155blank -->
  156	[C],
  157	{ nonvar(C),
  158	  code_type(C, space)
  159	}.
 nonblanks(-Codes)// is det
Take all graph characters
  165nonblanks([H|T]) -->
  166	[H],
  167	{ code_type(H, graph)
  168	}, !,
  169	nonblanks(T).
  170nonblanks([]) -->
  171	[].
 nonblank(-Code)// is semidet
Code is the next non-blank (graph) character.
  177nonblank(H) -->
  178	[H],
  179	{ code_type(H, graph)
  180	}.
 blanks_to_nl// is semidet
Take a sequence of blank//0 codes if blanks are followed by a newline or end of the input.
  187blanks_to_nl -->
  188	"\n", !.
  189blanks_to_nl -->
  190	blank, !,
  191	blanks_to_nl.
  192blanks_to_nl -->
  193	eos.
 whites// is det
Skip white space inside a line.
See also
- blanks//0 also skips newlines.
  201whites -->
  202	white, !,
  203	whites.
  204whites -->
  205	[].
 white// is semidet
Take next white character from input. White characters do not include newline.
  212white -->
  213	[C],
  214	{ nonvar(C),
  215	  code_type(C, white)
  216	}.
  217
  218
  219		 /*******************************
  220		 *	 CHARACTER STUFF	*
  221		 *******************************/
 alpha_to_lower(?C)// is semidet
Read a letter (class alpha) and return it as a lowercase letter. If C is instantiated and the DCG list is already bound, C must be lower and matches both a lower and uppercase letter. If the output list is unbound, its first element is bound to C. For example:
?- alpha_to_lower(0'a, `AB`, R).
R = [66].
?- alpha_to_lower(C, `AB`, R).
C = 97, R = [66].
?- alpha_to_lower(0'a, L, R).
L = [97|R].
  240alpha_to_lower(L) -->
  241	[C],
  242	{   nonvar(C)
  243	->  code_type(C, alpha),
  244	    code_type(C, to_upper(L))
  245	;   L = C
  246	}.
  247
  248
  249		 /*******************************
  250		 *	      NUMBERS		*
  251		 *******************************/
 digits(?Chars)// is det
 digit(?Char)// is det
 integer(?Integer)// is det
Number processing. The predicate digits//1 matches a possibly empty set of digits, digit//1 processes a single digit and integer processes an optional sign followed by a non-empty sequence of digits into an integer.
  262digits([H|T]) -->
  263	digit(H), !,
  264	digits(T).
  265digits([]) -->
  266	[].
  267
  268digit(C) -->
  269	[C],
  270	{ code_type(C, digit)
  271	}.
  272
  273integer(I, Head, Tail) :-
  274	nonvar(I), !,
  275	format(codes(Head, Tail), '~d', [I]).
  276integer(I) -->
  277	int_codes(Codes),
  278	{ number_codes(I, Codes)
  279	}.
  280
  281int_codes([C,D0|D]) -->
  282	sign(C), !,
  283	digit(D0),
  284	digits(D).
  285int_codes([D0|D]) -->
  286	digit(D0),
  287	digits(D).
 float(?Float)// is det
Process a floating point number. The actual conversion is controlled by number_codes/2.
  295float(F, Head, Tail) :-
  296	float(F), !,
  297	with_output_to(codes(Head, Tail), write(F)).
  298float(F) -->
  299	number(F),
  300	{ float(F) }.
 number(+Number)// is det
number(-Number)// is semidet
Generate extract a number. Handles both integers and floating point numbers.
  308number(N, Head, Tail) :-
  309	number(N), !,
  310	format(codes(Head, Tail), '~w', N).
  311number(N) -->
  312	{ var(N)
  313	},
  314	!,
  315	int_codes(I),
  316	(   dot,
  317	    digit(DF0),
  318	    digits(DF)
  319	->  {F = [0'., DF0|DF]}
  320	;   {F = []}
  321	),
  322	(   exp
  323	->  int_codes(DI),
  324	    {E=[0'e|DI]}
  325	;   {E = []}
  326	),
  327	{ append([I, F, E], Codes),
  328	  number_codes(N, Codes)
  329	}.
  330number(N) -->
  331	{ type_error(number, N) }.
  332
  333sign(0'-) --> "-".
  334sign(0'+) --> "+".
  335
  336dot --> ".".
  337
  338exp --> "e".
  339exp --> "E".
  340
  341		 /*******************************
  342		 *	    HEX NUMBERS		*
  343		 *******************************/
 xinteger(+Integer)// is det
xinteger(-Integer)// is semidet
Generate or extract an integer from a sequence of hexadecimal digits. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters. The value may be preceded by a sign (+/-)
  353xinteger(Val, Head, Tail) :-
  354	integer(Val), !,
  355	format(codes(Head, Tail), '~16r', [Val]).
  356xinteger(Val) -->
  357	sign(C), !,
  358	xdigit(D0),
  359	xdigits(D),
  360	{ mkval([D0|D], 16, Val0),
  361	  (   C == 0'-
  362	  ->  Val is -Val0
  363	  ;   Val = Val0
  364	  )
  365	}.
  366xinteger(Val) -->
  367	xdigit(D0),
  368	xdigits(D),
  369	{ mkval([D0|D], 16, Val)
  370	}.
 xdigit(-Weight)// is semidet
True if the next code is a hexdecimal digit with Weight. Weight is between 0 and 15. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters.
  378xdigit(D) -->
  379	[C],
  380	{ code_type(C, xdigit(D))
  381	}.
 xdigits(-WeightList)// is det
List of weights of a sequence of hexadecimal codes. WeightList may be empty. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters.
  389xdigits([D0|D]) -->
  390	xdigit(D0), !,
  391	xdigits(D).
  392xdigits([]) -->
  393	[].
  394
  395mkval([W0|Weights], Base, Val) :-
  396	mkval(Weights, Base, W0, Val).
  397
  398mkval([], _, W, W).
  399mkval([H|T], Base, W0, W) :-
  400	W1 is W0*Base+H,
  401	mkval(T, Base, W1, W).
  402
  403
  404		 /*******************************
  405		 *	   END-OF-STRING	*
  406		 *******************************/
 eos//
Matches end-of-input. The implementation behaves as the following portable implementation:
eos --> call(eos_).
eos_([], []).
To be done
- This is a difficult concept and violates the context free property of DCGs. Explain the exact problems.
  421eos([], []).
 remainder(-List)//
Unify List with the remainder of the input.
  427remainder(List, List, []).
  428
  429
  430		 /*******************************
  431		 *	   PROLOG SYNTAX		*
  432		 *******************************/
 prolog_var_name(-Name:atom)// is semidet
Matches a Prolog variable name. Primarily intended to deal with quasi quotations that embed Prolog variables.
  439prolog_var_name(Name) -->
  440	[C0], { code_type(C0, prolog_var_start) }, !,
  441	prolog_id_cont(CL),
  442	{ atom_codes(Name, [C0|CL]) }.
  443
  444prolog_id_cont([H|T]) -->
  445	[H], { code_type(H, prolog_identifier_continue) }, !,
  446	prolog_id_cont(T).
  447prolog_id_cont([]) --> "".
  448
  449
  450		 /*******************************
  451		 *	     GENERATION		*
  452		 *******************************/
 atom(++Atom)// is det
Generate codes of Atom. Current implementation uses write/1, dealing with any Prolog term. Atom must be ground though.
  459atom(Atom, Head, Tail) :-
  460	must_be(ground, Atom),
  461	format(codes(Head, Tail), '~w', [Atom])