Last active
September 14, 2018 14:19
-
-
Save hcarty/7217b82851552270e34c1ecd118335e4 to your computer and use it in GitHub Desktop.
Simple .tar.gz blob of bytes extractor using ezgzip + ocaml-tar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* This requires ezgzip and tar, probably installed via opam. The simplest way to test this code is: | |
$ utop tar_gz_example.ml | |
with a test.tar.gz file existing in the current directory. If test.tar.gz contains a lot of data | |
then your computer may have a bad time as this extracts everything from the tar file into | |
memory. *) | |
#require "ezgzip";; | |
#require "tar";; | |
(* Convenience - used for sanity checks to make sure the tar content | |
doesn't have a file size larger than an OCaml int can represent *) | |
let max_ocaml_int = Int64.of_int max_int | |
let read_file input_channel (header : Tar_cstruct.Header.t) = | |
let file_size = header.file_size in | |
(* If this were to happen we'd have some pretty big problems... *) | |
assert (file_size <= max_ocaml_int) ; | |
let buf = Cstruct.create (Int64.to_int file_size) in | |
Tar_cstruct.really_read input_channel buf ; | |
buf | |
let rec read_files input_channel accu = | |
match Tar_cstruct.Archive.with_next_file input_channel read_file with | |
| file -> read_files input_channel (file :: accu) | |
| exception Tar_cstruct.Header.End_of_stream -> List.rev accu | |
let extract_all_files raw_tar_gz = | |
let raw_tar = | |
match Ezgzip.decompress raw_tar_gz with | |
| Ok raw_tar -> raw_tar | |
| Error _ -> failwith "Invalid gzip" | |
in | |
let tar = Tar_cstruct.make_in_channel (Cstruct.of_string raw_tar) in | |
read_files tar [] | |
let () = | |
let infile = "test.tar.gz" in | |
let ic = open_in_bin infile in | |
let length = in_channel_length ic in | |
let raw_tar_gz = really_input_string ic length in | |
let files = extract_all_files raw_tar_gz in | |
Printf.printf "Number of files in %s: %d\n%!" infile (List.length files) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment