Last active
October 5, 2022 13:19
-
-
Save simonthompson99/69793adbd9b42772ce27addd2b1bdcf5 to your computer and use it in GitHub Desktop.
[BASH oneliners] #bash #oneliner #file_operations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# add up number of columns in file (assumes that first lines has correct number of columns) | |
for file in *.txt; do awk '{print NF; exit}' $file; done | paste -sd+ - | bc | |
# convert xlsx to pipe-separate text | |
find . -type f -name "*.xlsx" | while read file; do ssconvert -O 'separator=|' "$file" "${file%.xlsx}.txt"; done | |
# scp down files doing rename beforehand | |
# in this case for-download is <participant_id>|<file_num>|<source_filename> | |
cat for-download.txt | while IFS='|' read part id inf; do | |
out="${part}_${id}.png" | |
scp [email protected]:"$inf" ~/Downloads/cf/$out | |
done | |
# get number of files by file extension in directory tree | |
find . -type f | sed -e 's/.*\.//' | sed -e 's/.*\///' | sort | uniq -c | sort -rn > file-extension-totals.txt | |
# grep out lines that have start with certain values | |
# add in -x to match whole lines, and -F to interpret the entries in match-file as fixed string (not patterns) | |
# -v is invert, finds lines that don't match the values | |
grep -v -f <match-file> <in-file> | |
#-- rsync using files from reference text file | |
rsync -arvzh --progress [email protected]:/ . --files-from=files.txt | |
#-- flatten folder-tree of files | |
find ./tree -type f -exec mv {} ./flat \; | |
#-- do md5 on folder tree of files, with various different extensions and write results to file | |
find /opt \( -iname "*.pdf" -o -iname "*.tif" -o -iname "*.docx" -o -iname "*.doc" \) -type f -exec md5sum '{}' \; > md5sum.txt | |
#-- remove first line of file | |
sed '1d' <in>.txt > tmp.txt; mv tmp.txt <in>.txt | |
#-- get disk usage of top-level directorys | |
du -h -d1 / 2>/dev/null | |
# change file extension of multiple files | |
# see https://unix.stackexchange.com/a/19656 | |
for f in *.txt; do mv -- "$f" "${f%.txt}.text"; done | |
# extract out certain lines from a file | |
# extracts line 1 and then all lines between 738090 and 738099 inclusive | |
awk 'NR==1 || NR>=738090 && NR<=738099' bridge_shuffled.txt > few_line_shuf_source.txt | |
# randomise or shuffle lines in a file | |
shuf in.txt > out.txt | |
# diff two sorted files | |
diff <(sort file1.txt) <(sort file2.txt) | |
# check for duplicate lines in folder of files | |
for f in *.tsv; do sort $f | uniq -c | grep -v "^\s\+1" ; done | |
# count files by creation date | |
find . -type f -printf '%TY-%Tm-%Td\n' | sort | uniq -c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment