Skip to content

Instantly share code, notes, and snippets.

@efogdev
Created December 23, 2024 22:37
Show Gist options
  • Save efogdev/f73755575471c57b4fa5fa5c6975be9b to your computer and use it in GitHub Desktop.
Save efogdev/f73755575471c57b4fa5fa5c6975be9b to your computer and use it in GitHub Desktop.
#!/usr/bin/bash
# usage: ./compress.sh data-dir
format_size() {
num=$1
if [ "$num" -lt 1024 ]; then
echo "${num} B"
elif [ "$num" -lt $((1024 * 1024)) ]; then
echo "$(bc <<< "scale=2; $num/1024") KB"
elif [ "$num" -lt $((1024 * 1024 * 1024)) ]; then
echo "$(bc <<< "scale=2; $num/(1024*1024)") MB"
else
echo "$(bc <<< "scale=2; $num/(1024*1024*1024)") GB"
fi
}
measure_peak_ram() {
bash -c "exec $1" &
pid=$!
peak_ram=0
while kill -0 "$pid" 2>/dev/null; do
current_ram=$(awk '/VmRSS/ {print $2}' /proc/$pid/status 2>/dev/null || echo 0)
if [ "$current_ram" -gt "$peak_ram" ]; then
peak_ram=$current_ram
fi
sleep 0.1
done
echo "$peak_ram"
}
dir_name=$1
du -hd1 ./"$dir_name"
echo
printf "%-10s %-15s %-15s %-15s %-15s\n" "Method" "Time (s)" "Size" "Threads" "Peak RAM"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "zip -r ${dir_name}.zip $dir_name > /dev/null")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.zip")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "zip" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "1" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.zip"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "tar -cf - $dir_name | lz4 -q > ${dir_name}.lz4")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.lz4")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "lz4" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "1" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.lz4"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "7z a ${dir_name}.7z $dir_name > /dev/null")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.7z")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "7z" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "8" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.7z"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "tar -cJf ${dir_name}.tar.xz $dir_name > /dev/null")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.tar.xz")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "tar.xz" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "16" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.tar.xz"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "tar -czf ${dir_name}.tar.gz $dir_name > /dev/null")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.tar.gz")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "tar.gz" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "1" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.tar.gz"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "tar -cf - $dir_name | bzip2 > ${dir_name}.tar.bz2")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.tar.bz2")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "bzip2" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "1" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.tar.bz2"
start=$(date +%s.%N)
peak_ram=$(measure_peak_ram "tar -cf - $dir_name | zstd -q -o ${dir_name}.zst")
end=$(date +%s.%N)
size=$(stat -c%s "${dir_name}.zst")
printf "%-10s %-15.3f %-15s %-15s %-15s\n" "zstd" "$(echo "$end - $start" | bc)" "$(format_size "$size")" "8" "$(format_size "$((peak_ram * 1024))")"
rm "${dir_name}.zst"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment