Last active
July 18, 2023 11:35
-
-
Save pstch/6bb2c408aebbd3f3328052b07a0c3d53 to your computer and use it in GitHub Desktop.
Pipe a file to a process "in-place"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# stream-replace.sh -- pipe a file to a process "in-place", using fallocate | |
# | |
# Copyright (C) 2020 Hugo Geoffroy "pistache" <[email protected]> | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 3 of the License, or | |
# (at your option) any later version. | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# NOTE: This script is non-portable, and requires : | |
# - Linux (because we use fallocate() with FALLOC_FL_PUNCH_HOLE) | |
# - bash 4.4 (to wait on a process substitution) | |
# | |
# The script can be rewritten to not require bash 4.4, using an | |
# explicit named pipe instead of the process substitution. | |
# Enable exit-on-error for script initialization | |
set -e | |
# Check for compatible bash version | |
if ! [[ "${BASH_VERSINFO[0]}" -le 4 && "${BASH_VERSINFO[1]}" -le 4 ]]; then | |
echo "$0: unsupported bash version (need 4.4+)" | |
fi | |
# Parse blocksize argument | |
if [[ "$1" == "-b" ]]; then | |
bs="$2" | |
shift 2 | |
else | |
bs=1048576 | |
fi | |
# Show usage message | |
if [[ "$#" -lt 2 ]]; then | |
echo "Usage: $0 [-b BLOCKSIZE] FILE COMMAND" | |
echo "" | |
echo "Use this script to overwrite FILE block-by-block " | |
echo "by passing its contents through COMMAND, while " | |
echo "unallocating processed blocks from FILE." | |
echo "" | |
echo "This can be used to process a file in place, " | |
echo "when this file will not fit in available memory, " | |
echo "and when there is enough free space to make a copy." | |
echo "" | |
echo "If not provided, BLOCKSIZE will be set to 1MiB." | |
echo "" | |
echo "WARNING: if COMMAND doesn't do anything with the data" | |
echo " sent to its standard input, FILE will be" | |
echo " truncated." | |
echo "" | |
echo "NOTE: if COMMAND fails during processing, only some part" | |
echo " of FILE will have been processed, although" | |
echo " the last block before the failure will be " | |
echo " preserved untouched, and script will report" | |
echo " the amount of processed bytes." | |
exit 1 | |
fi | |
# Parse input file arguments | |
input="$1" | |
shift | |
# Test for input file existence | |
if [[ ! -e "$input" ]]; then | |
echo "$0: $input: No such file or directory" | |
exit 1 | |
fi | |
# Create temporary output file | |
inputdir=$(dirname "$input") | |
inputname=$(basename "$input") | |
output=$(mktemp -p "$inputdir" "${inputname}.XXXXXXXXXX") | |
# Initialize loop variables | |
size=$(wc -c < "$input") | |
offset=0 | |
i=0 | |
# Disable exit-on-error so that we can do our own error handling | |
set +e | |
# Main loop | |
while (( offset < size )); do | |
# read $bs bytes at $offset into the named pipe | |
dd if="$input" bs="$bs" skip="$i" count=1 status=none || break | |
# remove $bs bytes at n-1 blocks from the input file | |
if (( i > 0 )); then | |
fallocate -p -o $((bs * (i-1))) -l $bs "$input" | |
fi | |
# increment counter and calculate new offset | |
offset=$((bs * ++i)) | |
done > >( "$@" > "$output" ) ; wait $! ; retcode=$? | |
# Handle return code | |
if [[ $retcode -eq 0 ]]; then | |
# success: replace input file by output file | |
mv -f "$output" "$input" | |
else | |
# error: put back processed bytes in input file | |
dd if="$output" of="$input" bs="$bs" count="$i" conv=notrunc status=none | |
# delete output file | |
rm -f "$output" | |
# print error messages | |
echo "### Command $* failed with exit code $retcode ($offset bytes processed)" >&2 | |
fi | |
exit $retcode |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment