35
36:- module(csv,
37 [ csv//1, 38 csv//2, 39
40 csv_read_file/2, 41 csv_read_file/3, 42 csv_read_stream/3, 43
44 csv_read_file_row/3, 45 csv_read_row/3, 46 csv_options/2, 47
48 csv_write_file/2, 49 csv_write_file/3, 50 csv_write_stream/3 51 ]). 52:- use_module(library(record),[(record)/1, op(_,_,record)]). 53
54:- autoload(library(apply),[maplist/2]). 55:- autoload(library(debug),[debug/3]). 56:- autoload(library(error),[must_be/2,domain_error/2]). 57:- autoload(library(lists),[append/3]). 58:- autoload(library(option),[option/2,select_option/4]). 59:- autoload(library(pure_input),
60 [phrase_from_file/3,phrase_from_stream/2]). 61:- autoload(library(readutil),[read_line_to_codes/2]). 62:- autoload(library(dcg/basics),[string//1,eos//0]). 63
64
77
78:- predicate_options(csv//2, 2,
79 [ separator(nonneg), 80 strip(boolean),
81 ignore_quotes(boolean),
82 convert(boolean),
83 case(oneof([down,preserve,up])),
84 functor(atom),
85 arity(-nonneg), 86 match_arity(boolean)
87 ]). 88:- predicate_options(csv_read_file/3, 3,
89 [ pass_to(csv//2, 2),
90 pass_to(phrase_from_file/3, 3)
91 ]). 92:- predicate_options(csv_read_file_row/3, 3,
93 [ pass_to(csv//2, 2),
94 pass_to(open/4, 4)
95 ]). 96:- predicate_options(csv_write_file/3, 3,
97 [ pass_to(csv//2, 2),
98 pass_to(open/4, 4)
99 ]). 100:- predicate_options(csv_write_stream/3, 3,
101 [ pass_to(csv//2, 2)
102 ]). 103
104
105:- record
106 csv_options(separator:integer=0',,
107 strip:boolean=false,
108 ignore_quotes:boolean=false,
109 convert:boolean=true,
110 case:oneof([down,preserve,up])=preserve,
111 functor:atom=row,
112 arity:integer,
113 match_arity:boolean=true,
114 skip_header:atom). 115
116
136
137
138csv_read_file(File, Rows) :-
139 csv_read_file(File, Rows, []).
140
141csv_read_file(File, Rows, Options) :-
142 default_separator(File, Options, Options1),
143 make_csv_options(Options1, Record, RestOptions),
144 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions).
145
146
147default_separator(File, Options0, Options) :-
148 ( option(separator(_), Options0)
149 -> Options = Options0
150 ; file_name_extension(_, Ext0, File),
151 downcase_atom(Ext0, Ext),
152 ext_separator(Ext, Sep)
153 -> Options = [separator(Sep)|Options0]
154 ; Options = Options0
155 ).
156
157ext_separator(csv, 0',).
158ext_separator(tsv, 0'\t).
159
160
164
165csv_read_stream(Stream, Rows, Options) :-
166 make_csv_options(Options, Record, _),
167 phrase_from_stream(csv_roptions(Rows, Record), Stream).
168
169
218
219csv(Rows) -->
220 csv(Rows, []).
221
222csv(Rows, Options) -->
223 { make_csv_options(Options, Record, _) },
224 csv_roptions(Rows, Record).
225
226csv_roptions(Rows, Record) -->
227 { ground(Rows) },
228 !,
229 emit_csv(Rows, Record).
230csv_roptions(Rows, Record) -->
231 skip_header(Record),
232 csv_data(Rows, Record).
233
(Options) -->
235 { csv_options_skip_header(Options, CommentStart),
236 nonvar(CommentStart),
237 atom_codes(CommentStart, Codes)
238 },
239 !,
240 skip_header_lines(Codes),
241 skip_blank_lines.
242skip_header(_) -->
243 [].
244
(CommentStart) -->
246 string(CommentStart),
247 !,
248 ( string(_Comment),
249 end_of_record
250 -> skip_header_lines(CommentStart)
251 ).
252skip_header_lines(_) -->
253 [].
254
255skip_blank_lines -->
256 eos,
257 !.
258skip_blank_lines -->
259 end_of_record,
260 !,
261 skip_blank_lines.
262skip_blank_lines -->
263 [].
264
265csv_data([], _) -->
266 eos,
267 !.
268csv_data([Row|More], Options) -->
269 row(Row, Options),
270 !,
271 { debug(csv, 'Row: ~p', [Row]) },
272 csv_data(More, Options).
273
274
275row(Row, Options) -->
276 fields(Fields, Options),
277 { csv_options_functor(Options, Functor),
278 Row =.. [Functor|Fields],
279 functor(Row, _, Arity),
280 check_arity(Options, Arity)
281 }.
282
283check_arity(Options, Arity) :-
284 csv_options_arity(Options, Arity),
285 !.
286check_arity(Options, _) :-
287 csv_options_match_arity(Options, false),
288 !.
289check_arity(Options, Arity) :-
290 csv_options_arity(Options, Expected),
291 domain_error(row_arity(Expected), Arity).
292
293fields([F|T], Options) -->
294 field(F, Options),
295 ( separator(Options)
296 -> fields(T, Options)
297 ; end_of_record
298 -> { T = [] }
299 ).
300
301field(Value, Options) -->
302 "\"",
303 { csv_options_ignore_quotes(Options, false) },
304 !,
305 string_codes(Codes),
306 { make_value(Codes, Value, Options) }.
307field(Value, Options) -->
308 { csv_options_strip(Options, true) },
309 !,
310 stripped_field(Value, Options).
311field(Value, Options) -->
312 { csv_options_separator(Options, Sep) },
313 field_codes(Codes, Sep),
314 { make_value(Codes, Value, Options) }.
315
316
317stripped_field(Value, Options) -->
318 ws,
319 ( "\"",
320 { csv_options_strip(Options, false) }
321 -> string_codes(Codes),
322 ws
323 ; { csv_options_separator(Options, Sep) },
324 field_codes(Codes0, Sep),
325 { strip_trailing_ws(Codes0, Codes) }
326 ),
327 { make_value(Codes, Value, Options) }.
328
329ws --> " ", !, ws.
330ws --> "\t", !, ws.
331ws --> "".
332
333strip_trailing_ws(List, Stripped) :-
334 append(Stripped, WS, List),
335 all_ws(WS).
336
337all_ws([]).
338all_ws([32|T]) :- all_ws(T).
339all_ws([9|T]) :- all_ws(T).
340
341
346
347string_codes(List) -->
348 [H],
349 ( { H == 0'" }
350 -> ( "\""
351 -> { List = [H|T] },
352 string_codes(T)
353 ; { List = [] }
354 )
355 ; { List = [H|T] },
356 string_codes(T)
357 ).
358
359field_codes([], Sep), [Sep] --> [Sep], !.
360field_codes([], _), "\n" --> "\r\n", !.
361field_codes([], _), "\n" --> "\n", !.
362field_codes([], _), "\n" --> "\r", !.
363field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep).
364field_codes([], _) --> []. 365
370
371make_value(Codes, Value, Options) :-
372 csv_options_convert(Options, Convert),
373 csv_options_case(Options, Case),
374 make_value(Convert, Case, Codes, Value).
375
376make_value(true, preserve, Codes, Value) :-
377 !,
378 name(Value, Codes).
379make_value(true, Case, Codes, Value) :-
380 !,
381 ( number_string(Value, Codes)
382 -> true
383 ; make_value(false, Case, Codes, Value)
384 ).
385make_value(false, preserve, Codes, Value) :-
386 !,
387 atom_codes(Value, Codes).
388make_value(false, down, Codes, Value) :-
389 !,
390 string_codes(String, Codes),
391 downcase_atom(String, Value).
392make_value(false, up, Codes, Value) :-
393 string_codes(String, Codes),
394 upcase_atom(String, Value).
395
396separator(Options) -->
397 { csv_options_separator(Options, Sep) },
398 [Sep].
399
400end_of_record --> "\n". 401end_of_record --> "\r\n". 402end_of_record --> "\r". 403end_of_record --> eos. 404
405
426
427csv_read_file_row(File, Row, Options) :-
428 default_separator(File, Options, Options1),
429 make_csv_options(Options1, RecordOptions, Options2),
430 select_option(line(Line), Options2, RestOptions, _),
431 setup_call_cleanup(
432 open(File, read, Stream, RestOptions),
433 csv_read_stream_row(Stream, Row, Line, RecordOptions),
434 close(Stream)).
435
436csv_read_stream_row(Stream, Row, Line, Options) :-
437 between(1, infinite, Line),
438 ( csv_read_row(Stream, Row0, Options),
439 Row0 \== end_of_file
440 -> Row = Row0
441 ; !,
442 fail
443 ).
444
445
452
453csv_read_row(Stream, Row, _Record) :-
454 at_end_of_stream(Stream),
455 !,
456 Row = end_of_file.
457csv_read_row(Stream, Row, Record) :-
458 read_lines_to_codes(Stream, Codes, Record, even),
459 phrase(row(Row0, Record), Codes),
460 !,
461 Row = Row0.
462
463read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :-
464 read_line_to_codes(Stream, Codes0),
465 Codes0 \== end_of_file,
466 ( ( csv_options_ignore_quotes(Options, true)
467 ; check_quotes(Codes0, QuoteQuantity, even)
468 )
469 -> Codes = Codes0
470 ; append(Codes0, [0'\n|Tail], Codes),
471 read_lines_to_codes(Stream, Tail, Options, odd)
472 ).
473
474check_quotes([], QuoteQuantity, QuoteQuantity) :-
475 !.
476check_quotes([0'"|T], odd, Result) :-
477 !,
478 check_quotes(T, even, Result).
479check_quotes([0'"|T], even, Result) :-
480 !,
481 check_quotes(T, odd, Result).
482check_quotes([_|T], QuoteQuantity, Result) :-
483 check_quotes(T, QuoteQuantity, Result).
484
485
492
493csv_options(Compiled, Options) :-
494 make_csv_options(Options, Compiled, _Ignored).
495
496
497 500
508
509csv_write_file(File, Data) :-
510 csv_write_file(File, Data, []).
511
512csv_write_file(File, Data, Options) :-
513 must_be(list, Data),
514 default_separator(File, Options, Options1),
515 make_csv_options(Options1, OptionsRecord, RestOptions),
516 setup_call_cleanup(
517 open(File, write, Out, RestOptions),
518 maplist(csv_write_row(Out, OptionsRecord), Data),
519 close(Out)).
520
521csv_write_row(Out, OptionsRecord, Row) :-
522 phrase(emit_row(Row, OptionsRecord), String),
523 format(Out, '~s', [String]).
524
525emit_csv([], _) --> [].
526emit_csv([H|T], Options) -->
527 emit_row(H, Options),
528 emit_csv(T, Options).
529
530emit_row(Row, Options) -->
531 { Row =.. [_|Fields] },
532 emit_fields(Fields, Options),
533 "\r\n". 534
535emit_fields([], _) -->
536 "".
537emit_fields([H|T], Options) -->
538 emit_field(H, Options),
539 ( { T == [] }
540 -> []
541 ; { csv_options_separator(Options, Sep) },
542 [Sep],
543 emit_fields(T, Options)
544 ).
545
546emit_field(H, Options) -->
547 { ( atom(H)
548 -> atom_codes(H, Codes)
549 ; string(H)
550 -> string_codes(H, Codes)
551 )
552 },
553 !,
554 ( { needs_quotes(H, Options) }
555 -> "\"", emit_string(Codes), "\""
556 ; emit_codes(Codes)
557 ).
558emit_field([], _) -->
559 !,
560 { atom_codes('[]', Codes) },
561 emit_codes(Codes).
562emit_field(H, _) -->
563 { number_codes(H,Codes) },
564 emit_codes(Codes).
565
566needs_quotes(Atom, _) :-
567 sub_atom(Atom, _, _, _, '"'),
568 !.
569needs_quotes(Atom, _) :-
570 sub_atom(Atom, _, _, _, '\n'),
571 !.
572needs_quotes(Atom, _) :-
573 sub_atom(Atom, _, _, _, '\r'),
574 !.
575needs_quotes(Atom, Options) :-
576 csv_options_separator(Options, Sep),
577 char_code(Char, Sep),
578 sub_atom(Atom, _, _, _, Char),
579 !.
580
581emit_string([]) --> "".
582emit_string([0'"|T]) --> !, "\"\"", emit_string(T).
583emit_string([H|T]) --> [H], emit_string(T).
584
585emit_codes([]) --> "".
586emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T).
587emit_codes([H|T]) --> [H], emit_codes(T).
588
589
605
606csv_write_stream(Stream, Data, Options) :-
607 must_be(list, Data),
608 make_csv_options(Options, OptionsRecord, _),
609 maplist(csv_write_row(Stream, OptionsRecord), Data)