1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker and Matt Lilley 4 E-mail: J.Wielemaker@cs.vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2012-2019, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(archive, 36 [ archive_open/3, % +Stream, -Archive, +Options 37 archive_open/4, % +Stream, +Mode, -Archive, +Options 38 archive_create/3, % +OutputFile, +InputFileList, +Options 39 archive_close/1, % +Archive 40 archive_property/2, % +Archive, ?Property 41 archive_next_header/2, % +Archive, -Name 42 archive_open_entry/2, % +Archive, -EntryStream 43 archive_header_property/2, % +Archive, ?Property 44 archive_set_header_property/2, % +Archive, +Property 45 archive_extract/3, % +Archive, +Dir, +Options 46 47 archive_entries/2, % +Archive, -Entries 48 archive_data_stream/3, % +Archive, -DataStream, +Options 49 archive_foldl/4 % :Goal, +Archive, +State0, -State 50 ]). 51:- autoload(library(error), 52 [existence_error/2,domain_error/2,must_be/2]). 53:- autoload(library(filesex), 54 [directory_file_path/3,make_directory_path/1]). 55:- autoload(library(lists),[member/2]). 56:- autoload(library(option),[option/3,option/2]). 57 58:- meta_predicate 59 archive_foldl( , , , ). 60 61/** <module> Access several archive formats 62 63This library uses _libarchive_ to access a variety of archive formats. 64The following example lists the entries in an archive: 65 66 ``` 67 list_archive(File) :- 68 archive_open(File, Archive, []), 69 repeat, 70 ( archive_next_header(Archive, Path) 71 -> format('~w~n', [Path]), 72 fail 73 ; !, 74 archive_close(Archive) 75 ). 76 ``` 77 78Here is another example which counts the files in the archive and prints 79file type information. It uses archive_foldl/4, a higher level 80predicate: 81 82 ``` 83 print_entry(Path, Handle, Cnt0, Cnt1) :- 84 archive_header_property(Handle, filetype(Type)), 85 format('File ~w is of type ~w~n', [Path, Type]), 86 Cnt1 is Cnt0 + 1. 87 88 list_archive(File) :- 89 archive_foldl(print_entry, File, 0, FileCount), 90 format('We have ~w files', [FileCount]). 91 ``` 92 93@see https://github.com/libarchive/libarchive/ 94*/ 95 96:- use_foreign_library(foreign(archive4pl)). 97 98%! archive_open(+Data, -Archive, +Options) is det. 99% 100% Wrapper around archive_open/4 that opens the archive in read mode. 101 102archive_open(Stream, Archive, Options) :- 103 archive_open(Stream, read, Archive, Options). 104 105:- predicate_options(archive_open/4, 4, 106 [ close_parent(boolean), 107 filter(oneof([all,bzip2,compress,gzip,grzip,lrzip, 108 lzip,lzma,lzop,none,rpm,uu,xz])), 109 format(oneof([all,'7zip',ar,cab,cpio,empty,gnutar, 110 iso9660,lha,mtree,rar,raw,tar,xar,zip])) 111 ]). 112:- predicate_options(archive_create/3, 3, 113 [ directory(atom), 114 pass_to(archive_open/4, 4) 115 ]). 116 117%! archive_open(+Data, +Mode, -Archive, +Options) is det. 118% 119% Open the archive in Data and unify Archive with a handle to the 120% opened archive. Data is either a file or a stream that contains 121% a valid archive. Details are controlled by Options. Typically, 122% the option close_parent(true) is used to close stream if the 123% archive is closed using archive_close/1. For other options, the 124% defaults are typically fine. The option format(raw) must be used 125% to process compressed streams that do not contain explicit 126% entries (e.g., gzip'ed data) unambibuously. The =raw= format 127% creates a _pseudo archive_ holding a single member named =data=. 128% 129% * close_parent(+Boolean) 130% If this option is =true= (default =false=), Stream is closed 131% if archive_close/1 is called on Archive. 132% 133% * compression(+Compression) 134% Synomym for filter(Compression). Deprecated. 135% 136% * filter(+Filter) 137% Support the indicated filter. This option may be 138% used multiple times to support multiple filters. In read mode, 139% If no filter options are provided, =all= is assumed. In write 140% mode, none is assumed. 141% Supported values are =all=, =bzip2=, =compress=, =gzip=, 142% =grzip=, =lrzip=, =lzip=, =lzma=, =lzop=, =none=, =rpm=, =uu= 143% and =xz=. The value =all= is default for read, =none= for write. 144% 145% * format(+Format) 146% Support the indicated format. This option may be used 147% multiple times to support multiple formats in read mode. 148% In write mode, you must supply a single format. If no format 149% options are provided, =all= is assumed for read mode. Note that 150% =all= does *not* include =raw= and =mtree=. To open both archive 151% and non-archive files, _both_ format(all) and 152% format(raw) and/or format(mtree) must be specified. Supported 153% values are: =all=, =7zip=, =ar=, =cab=, =cpio=, =empty=, =gnutar=, 154% =iso9660=, =lha=, =mtree=, =rar=, =raw=, =tar=, =xar= and =zip=. 155% The value =all= is default for read. 156% 157% Note that the actually supported compression types and formats 158% may vary depending on the version and installation options of 159% the underlying libarchive library. This predicate raises a 160% domain error if the (explicitly) requested format is not 161% supported. 162% 163% @error domain_error(filter, Filter) if the requested 164% filter is not supported. 165% @error domain_error(format, Format) if the requested 166% format type is not supported. 167 168archive_open(stream(Stream), Mode, Archive, Options) :- 169 !, 170 archive_open_stream(Stream, Mode, Archive, Options). 171archive_open(Stream, Mode, Archive, Options) :- 172 is_stream(Stream), 173 !, 174 archive_open_stream(Stream, Mode, Archive, Options). 175archive_open(File, Mode, Archive, Options) :- 176 open(File, Mode, Stream, [type(binary)]), 177 catch(archive_open_stream(Stream, Mode, Archive, [close_parent(true)|Options]), 178 E, (close(Stream, [force(true)]), throw(E))). 179 180 181%! archive_close(+Archive) is det. 182% 183% Close the archive. If close_parent(true) is specified, the 184% underlying stream is closed too. If there is an entry opened 185% with archive_open_entry/2, actually closing the archive is 186% delayed until the stream associated with the entry is closed. 187% This can be used to open a stream to an archive entry without 188% having to worry about closing the archive: 189% 190% == 191% archive_open_named(ArchiveFile, EntryName, Stream) :- 192% archive_open(ArchiveFile, Handle, []), 193% archive_next_header(Handle, Name), 194% archive_open_entry(Handle, Stream), 195% archive_close(Archive). 196% == 197 198 199%! archive_property(+Handle, ?Property) is nondet. 200% 201% True when Property is a property of the archive Handle. Defined 202% properties are: 203% 204% * filters(List) 205% True when the indicated filters are applied before reaching 206% the archive format. 207 208archive_property(Handle, Property) :- 209 defined_archive_property(Property), 210 Property =.. [Name,Value], 211 archive_property(Handle, Name, Value). 212 213defined_archive_property(filter(_)). 214 215 216%! archive_next_header(+Handle, -Name) is semidet. 217% 218% Forward to the next entry of the archive for which Name unifies 219% with the pathname of the entry. Fails silently if the name of 220% the archive is reached before success. Name is typically 221% specified if a single entry must be accessed and unbound 222% otherwise. The following example opens a Prolog stream to a 223% given archive entry. Note that _Stream_ must be closed using 224% close/1 and the archive must be closed using archive_close/1 225% after the data has been used. See also setup_call_cleanup/3. 226% 227% == 228% open_archive_entry(ArchiveFile, Entry, Stream) :- 229% open(ArchiveFile, read, In, [type(binary)]), 230% archive_open(In, Archive, [close_parent(true)]), 231% archive_next_header(Archive, Entry), 232% archive_open_entry(Archive, Stream). 233% == 234% 235% @error permission_error(next_header, archive, Handle) if a 236% previously opened entry is not closed. 237 238%! archive_open_entry(+Archive, -Stream) is det. 239% 240% Open the current entry as a stream. Stream must be closed. 241% If the stream is not closed before the next call to 242% archive_next_header/2, a permission error is raised. 243 244 245%! archive_set_header_property(+Archive, +Property) 246% 247% Set Property of the current header. Write-mode only. Defined 248% properties are: 249% 250% * filetype(-Type) 251% Type is one of =file=, =link=, =socket=, =character_device=, 252% =block_device=, =directory= or =fifo=. It appears that this 253% library can also return other values. These are returned as 254% an integer. 255% * mtime(-Time) 256% True when entry was last modified at time. 257% * size(-Bytes) 258% True when entry is Bytes long. 259% * link_target(-Target) 260% Target for a link. Currently only supported for symbolic 261% links. 262 263%! archive_header_property(+Archive, ?Property) 264% 265% True when Property is a property of the current header. Defined 266% properties are: 267% 268% * filetype(-Type) 269% Type is one of =file=, =link=, =socket=, =character_device=, 270% =block_device=, =directory= or =fifo=. It appears that this 271% library can also return other values. These are returned as 272% an integer. 273% * mtime(-Time) 274% True when entry was last modified at time. 275% * size(-Bytes) 276% True when entry is Bytes long. 277% * link_target(-Target) 278% Target for a link. Currently only supported for symbolic 279% links. 280% * format(-Format) 281% Provides the name of the archive format applicable to the 282% current entry. The returned value is the lowercase version 283% of the output of archive_format_name(). 284% * permissions(-Integer) 285% True when entry has the indicated permission mask. 286 287archive_header_property(Archive, Property) :- 288 ( nonvar(Property) 289 -> true 290 ; header_property(Property) 291 ), 292 archive_header_prop_(Archive, Property). 293 294header_property(filetype(_)). 295header_property(mtime(_)). 296header_property(size(_)). 297header_property(link_target(_)). 298header_property(format(_)). 299header_property(permissions(_)). 300 301 302%! archive_extract(+ArchiveFile, +Dir, +Options) 303% 304% Extract files from the given archive into Dir. Supported 305% options: 306% 307% * remove_prefix(+Prefix) 308% Strip Prefix from all entries before extracting. If Prefix 309% is a list, then each prefix is tried in order, succeding at 310% the first one that matches. If no prefixes match, an error 311% is reported. If Prefix is an atom, then that prefix is removed. 312% * exclude(+ListOfPatterns) 313% Ignore members that match one of the given patterns. 314% Patterns are handed to wildcard_match/2. 315% * include(+ListOfPatterns) 316% Include members that match one of the given patterns. 317% Patterns are handed to wildcard_match/2. The `exclude` 318% options takes preference if a member matches both the `include` 319% and the `exclude` option. 320% 321% @error existence_error(directory, Dir) if Dir does not exist 322% or is not a directory. 323% @error domain_error(path_prefix(Prefix), Path) if a path in 324% the archive does not start with Prefix 325% @tbd Add options 326 327archive_extract(Archive, Dir, Options) :- 328 ( exists_directory(Dir) 329 -> true 330 ; existence_error(directory, Dir) 331 ), 332 setup_call_cleanup( 333 archive_open(Archive, Handle, Options), 334 extract(Handle, Dir, Options), 335 archive_close(Handle)). 336 337extract(Archive, Dir, Options) :- 338 archive_next_header(Archive, Path), 339 !, 340 option(include(InclPatterns), Options, ['*']), 341 option(exclude(ExclPatterns), Options, []), 342 ( archive_header_property(Archive, filetype(file)), 343 \+ matches(ExclPatterns, Path), 344 matches(InclPatterns, Path) 345 -> archive_header_property(Archive, permissions(Perm)), 346 remove_prefix(Options, Path, ExtractPath), 347 directory_file_path(Dir, ExtractPath, Target), 348 file_directory_name(Target, FileDir), 349 make_directory_path(FileDir), 350 setup_call_cleanup( 351 archive_open_entry(Archive, In), 352 setup_call_cleanup( 353 open(Target, write, Out, [type(binary)]), 354 copy_stream_data(In, Out), 355 close(Out)), 356 close(In)), 357 set_permissions(Perm, Target) 358 ; true 359 ), 360 extract(Archive, Dir, Options). 361extract(_, _, _). 362 363%! matches(+Patterns, +Path) is semidet. 364% 365% True when Path matches a pattern in Patterns. 366 367matches([], _Path) :- 368 !, 369 fail. 370matches(Patterns, Path) :- 371 split_string(Path, "/", "/", Parts), 372 member(Segment, Parts), 373 Segment \== "", 374 member(Pattern, Patterns), 375 wildcard_match(Pattern, Segment), 376 !. 377 378remove_prefix(Options, Path, ExtractPath) :- 379 ( option(remove_prefix(Remove), Options) 380 -> ( is_list(Remove) 381 -> ( member(P, Remove), 382 atom_concat(P, ExtractPath, Path) 383 -> true 384 ; domain_error(path_prefix(Remove), Path) 385 ) 386 ; ( atom_concat(Remove, ExtractPath, Path) 387 -> true 388 ; domain_error(path_prefix(Remove), Path) 389 ) 390 ) 391 ; ExtractPath = Path 392 ). 393 394%! set_permissions(+Perm:integer, +Target:atom) 395% 396% Restore the permissions. Currently only restores the executable 397% permission. 398 399set_permissions(Perm, Target) :- 400 Perm /\ 0o100 =\= 0, 401 !, 402 '$mark_executable'(Target). 403set_permissions(_, _). 404 405 406 /******************************* 407 * HIGH LEVEL PREDICATES * 408 *******************************/ 409 410%! archive_entries(+Archive, -Paths) is det. 411% 412% True when Paths is a list of pathnames appearing in Archive. 413 414archive_entries(Archive, Paths) :- 415 setup_call_cleanup( 416 archive_open(Archive, Handle, []), 417 contents(Handle, Paths), 418 archive_close(Handle)). 419 420contents(Handle, [Path|T]) :- 421 archive_next_header(Handle, Path), 422 !, 423 contents(Handle, T). 424contents(_, []). 425 426%! archive_data_stream(+Archive, -DataStream, +Options) is nondet. 427% 428% True when DataStream is a stream to a data object inside 429% Archive. This predicate transparently unpacks data inside 430% _possibly nested_ archives, e.g., a _tar_ file inside a _zip_ 431% file. It applies the appropriate decompression filters and thus 432% ensures that Prolog reads the plain data from DataStream. 433% DataStream must be closed after the content has been processed. 434% Backtracking opens the next member of the (nested) archive. This 435% predicate processes the following options: 436% 437% - meta_data(-Data:list(dict)) 438% If provided, Data is unified with a list of filters applied to 439% the (nested) archive to open the current DataStream. The first 440% element describes the outermost archive. Each Data dict 441% contains the header properties (archive_header_property/2) as 442% well as the keys: 443% 444% - filters(Filters:list(atom)) 445% Filter list as obtained from archive_property/2 446% - name(Atom) 447% Name of the entry. 448% 449% Non-archive files are handled as pseudo-archives that hold a 450% single stream. This is implemented by using archive_open/3 with 451% the options `[format(all),format(raw)]`. 452 453archive_data_stream(Archive, DataStream, Options) :- 454 option(meta_data(MetaData), Options, _), 455 archive_content(Archive, DataStream, MetaData, []). 456 457archive_content(Archive, Entry, [EntryMetadata|PipeMetadataTail], PipeMetadata2) :- 458 archive_property(Archive, filter(Filters)), 459 repeat, 460 ( archive_next_header(Archive, EntryName) 461 -> findall(EntryProperty, 462 archive_header_property(Archive, EntryProperty), 463 EntryProperties), 464 dict_create(EntryMetadata, archive_meta_data, 465 [ filters(Filters), 466 name(EntryName) 467 | EntryProperties 468 ]), 469 ( EntryMetadata.filetype == file 470 -> archive_open_entry(Archive, Entry0), 471 ( EntryName == data, 472 EntryMetadata.format == raw 473 -> % This is the last entry in this nested branch. 474 % We therefore close the choicepoint created by repeat/0. 475 % Not closing this choicepoint would cause 476 % archive_next_header/2 to throw an exception. 477 !, 478 PipeMetadataTail = PipeMetadata2, 479 Entry = Entry0 480 ; PipeMetadataTail = PipeMetadata1, 481 open_substream(Entry0, 482 Entry, 483 PipeMetadata1, 484 PipeMetadata2) 485 ) 486 ; fail 487 ) 488 ; !, 489 fail 490 ). 491 492open_substream(In, Entry, ArchiveMetadata, PipeTailMetadata) :- 493 setup_call_cleanup( 494 archive_open(stream(In), 495 Archive, 496 [ close_parent(true), 497 format(all), 498 format(raw) 499 ]), 500 archive_content(Archive, Entry, ArchiveMetadata, PipeTailMetadata), 501 archive_close(Archive)). 502 503 504%! archive_create(+OutputFile, +InputFiles, +Options) is det. 505% 506% Convenience predicate to create an archive in OutputFile with 507% data from a list of InputFiles and the given Options. 508% 509% Besides options supported by archive_open/4, the following 510% options are supported: 511% 512% * directory(+Directory) 513% Changes the directory before adding input files. If this is 514% specified, paths of input files must be relative to 515% Directory and archived files will not have Directory 516% as leading path. This is to simulate =|-C|= option of 517% the =tar= program. 518% 519% * format(+Format) 520% Write mode supports the following formats: `7zip`, `cpio`, 521% `gnutar`, `iso9660`, `xar` and `zip`. Note that a particular 522% installation may support only a subset of these, depending on 523% the configuration of `libarchive`. 524 525archive_create(OutputFile, InputFiles, Options) :- 526 must_be(list(text), InputFiles), 527 option(directory(BaseDir), Options, '.'), 528 setup_call_cleanup( 529 archive_open(OutputFile, write, Archive, Options), 530 archive_create_1(Archive, BaseDir, BaseDir, InputFiles, top), 531 archive_close(Archive)). 532 533archive_create_1(_, _, _, [], _) :- !. 534archive_create_1(Archive, Base, Current, ['.'|Files], sub) :- 535 !, 536 archive_create_1(Archive, Base, Current, Files, sub). 537archive_create_1(Archive, Base, Current, ['..'|Files], Where) :- 538 !, 539 archive_create_1(Archive, Base, Current, Files, Where). 540archive_create_1(Archive, Base, Current, [File|Files], Where) :- 541 directory_file_path(Current, File, Filename), 542 archive_create_2(Archive, Base, Filename), 543 archive_create_1(Archive, Base, Current, Files, Where). 544 545archive_create_2(Archive, Base, Directory) :- 546 exists_directory(Directory), 547 !, 548 entry_name(Base, Directory, Directory0), 549 archive_next_header(Archive, Directory0), 550 time_file(Directory, Time), 551 archive_set_header_property(Archive, mtime(Time)), 552 archive_set_header_property(Archive, filetype(directory)), 553 archive_open_entry(Archive, EntryStream), 554 close(EntryStream), 555 directory_files(Directory, Files), 556 archive_create_1(Archive, Base, Directory, Files, sub). 557archive_create_2(Archive, Base, Filename) :- 558 entry_name(Base, Filename, Filename0), 559 archive_next_header(Archive, Filename0), 560 size_file(Filename, Size), 561 time_file(Filename, Time), 562 archive_set_header_property(Archive, size(Size)), 563 archive_set_header_property(Archive, mtime(Time)), 564 setup_call_cleanup( 565 archive_open_entry(Archive, EntryStream), 566 setup_call_cleanup( 567 open(Filename, read, DataStream, [type(binary)]), 568 copy_stream_data(DataStream, EntryStream), 569 close(DataStream)), 570 close(EntryStream)). 571 572entry_name('.', Name, Name) :- !. 573entry_name(Base, Name, EntryName) :- 574 directory_file_path(Base, EntryName, Name). 575 576%! archive_foldl(:Goal, +Archive, +State0, -State). 577% 578% Operates like foldl/4 but for the entries in the archive. For each 579% member of the archive, Goal called as `call(:Goal, +Path, +Handle, 580% +S0, -S1). Here, `S0` is current state of the _accumulator_ 581% (starting with State0) and `S1` is the next state of the 582% accumulator, producing State after the last member of the archive. 583% 584% @see archive_header_property/2, archive_open/4. 585% 586% @arg Archive File name or stream to be given to archive_open/[3,4]. 587 588archive_foldl(Goal, Archive, State0, State) :- 589 setup_call_cleanup( 590 archive_open(Archive, Handle, [close_parent(true)]), 591 archive_foldl_(Goal, Handle, State0, State), 592 archive_close(Handle) 593 ). 594 595archive_foldl_(Goal, Handle, State0, State) :- 596 ( archive_next_header(Handle, Path) 597 -> call(Goal, Path, Handle, State0, State1), 598 archive_foldl_(Goal, Handle, State1, State) 599 ; State = State0 600 ). 601 602 603 /******************************* 604 * MESSAGES * 605 *******************************/ 606 607:- multifile prolog:error_message//1. 608 609prologerror_message(archive_error(Code, Message)) --> 610 [ 'Archive error (code ~p): ~w'-[Code, Message] ]