Skip to content

Instantly share code, notes, and snippets.

@hcarty
Last active September 14, 2018 14:19
Show Gist options
  • Save hcarty/7217b82851552270e34c1ecd118335e4 to your computer and use it in GitHub Desktop.
Save hcarty/7217b82851552270e34c1ecd118335e4 to your computer and use it in GitHub Desktop.
Simple .tar.gz blob of bytes extractor using ezgzip + ocaml-tar
(* This requires ezgzip and tar, probably installed via opam. The simplest way to test this code is:
$ utop tar_gz_example.ml
with a test.tar.gz file existing in the current directory. If test.tar.gz contains a lot of data
then your computer may have a bad time as this extracts everything from the tar file into
memory. *)
#require "ezgzip";;
#require "tar";;
(* Convenience - used for sanity checks to make sure the tar content
doesn't have a file size larger than an OCaml int can represent *)
let max_ocaml_int = Int64.of_int max_int
let read_file input_channel (header : Tar_cstruct.Header.t) =
let file_size = header.file_size in
(* If this were to happen we'd have some pretty big problems... *)
assert (file_size <= max_ocaml_int) ;
let buf = Cstruct.create (Int64.to_int file_size) in
Tar_cstruct.really_read input_channel buf ;
buf
let rec read_files input_channel accu =
match Tar_cstruct.Archive.with_next_file input_channel read_file with
| file -> read_files input_channel (file :: accu)
| exception Tar_cstruct.Header.End_of_stream -> List.rev accu
let extract_all_files raw_tar_gz =
let raw_tar =
match Ezgzip.decompress raw_tar_gz with
| Ok raw_tar -> raw_tar
| Error _ -> failwith "Invalid gzip"
in
let tar = Tar_cstruct.make_in_channel (Cstruct.of_string raw_tar) in
read_files tar []
let () =
let infile = "test.tar.gz" in
let ic = open_in_bin infile in
let length = in_channel_length ic in
let raw_tar_gz = really_input_string ic length in
let files = extract_all_files raw_tar_gz in
Printf.printf "Number of files in %s: %d\n%!" infile (List.length files)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment