simonthompson99 · October 5, 2022 13:19
diff --git a/bashOneLiners.sh b/bashOneLiners.sh
 # add up number of columns in file (assumes that first lines has correct number of columns)
 for file in *.txt; do awk '{print NF; exit}' $file; done | paste -sd+ - | bc
 
 # convert xlsx to pipe-separate text
 find . -type f -name "*.xlsx" | while read file; do ssconvert -O 'separator=|' "$file" "${file%.xlsx}.txt"; done

 # scp down files doing rename beforehand
 # in this case for-download is <participant_id>|<file_num>|<source_filename>
 cat for-download.txt | while IFS='|' read part id inf; do
    out="${part}_${id}.png"
    scp [email protected]:"$inf" ~/Downloads/cf/$out
 done
 
 # get number of files by file extension in directory tree
 find . -type f | sed -e 's/.*\.//' | sed -e 's/.*\///' | sort | uniq -c | sort -rn > file-extension-totals.txt

 # grep out lines that have start with certain values
 # add in -x to match whole lines, and -F to interpret the entries in match-file as fixed string (not patterns)
 # -v is invert, finds lines that don't match the values
 grep -v -f <match-file> <in-file>

 #-- rsync using files from reference text file
 rsync -arvzh --progress [email protected]:/ . --files-from=files.txt

 #-- flatten folder-tree of files
 find ./tree -type f -exec mv {} ./flat \;

 #-- do md5 on folder tree of files, with various different extensions and write results to file
 find /opt \( -iname "*.pdf" -o -iname "*.tif" -o -iname "*.docx" -o -iname "*.doc" \) -type f -exec md5sum '{}' \; > md5sum.txt

 #-- remove first line of file
 sed '1d' <in>.txt > tmp.txt; mv tmp.txt <in>.txt

 #-- get disk usage of top-level directorys
 du -h -d1 / 2>/dev/null

 # change file extension of multiple files
 # see https://unix.stackexchange.com/a/19656 
 for f in *.txt; do mv -- "$f" "${f%.txt}.text"; done

 # extract out certain lines from a file
 # extracts line 1 and then all lines between 738090 and 738099 inclusive
 awk 'NR==1 || NR>=738090 && NR<=738099' bridge_shuffled.txt > few_line_shuf_source.txt

 # randomise or shuffle lines in a file
 shuf in.txt > out.txt

 # diff two sorted files
 diff <(sort file1.txt) <(sort file2.txt)

 # check for duplicate lines in folder of files
 for f in *.tsv; do sort $f | uniq -c | grep -v "^\s\+1" ; done

 # count files by creation date
 find . -type f -printf '%TY-%Tm-%Td\n' | sort | uniq -c
	# add up number of columns in file (assumes that first lines has correct number of columns)
	for file in *.txt; do awk '{print NF; exit}' $file; done \| paste -sd+ - \| bc

	# convert xlsx to pipe-separate text
	find . -type f -name "*.xlsx" \| while read file; do ssconvert -O 'separator=\|' "$file" "${file%.xlsx}.txt"; done

	# scp down files doing rename beforehand
	# in this case for-download is <participant_id>\|<file_num>\|<source_filename>
	cat for-download.txt \| while IFS='\|' read part id inf; do
	out="${part}_${id}.png"
	scp [email protected]:"$inf" ~/Downloads/cf/$out
	done

	# get number of files by file extension in directory tree
	find . -type f \| sed -e 's/.\.//' \| sed -e 's/.\///' \| sort \| uniq -c \| sort -rn > file-extension-totals.txt

	# grep out lines that have start with certain values
	# add in -x to match whole lines, and -F to interpret the entries in match-file as fixed string (not patterns)
	# -v is invert, finds lines that don't match the values
	grep -v -f <match-file> <in-file>

	#-- rsync using files from reference text file
	rsync -arvzh --progress [email protected]:/ . --files-from=files.txt

	#-- flatten folder-tree of files
	find ./tree -type f -exec mv {} ./flat \;

	#-- do md5 on folder tree of files, with various different extensions and write results to file
	find /opt \( -iname ".pdf" -o -iname ".tif" -o -iname ".docx" -o -iname ".doc" \) -type f -exec md5sum '{}' \; > md5sum.txt

	#-- remove first line of file
	sed '1d' <in>.txt > tmp.txt; mv tmp.txt <in>.txt

	#-- get disk usage of top-level directorys
	du -h -d1 / 2>/dev/null

	# change file extension of multiple files
	# see https://unix.stackexchange.com/a/19656
	for f in *.txt; do mv -- "$f" "${f%.txt}.text"; done

	# extract out certain lines from a file
	# extracts line 1 and then all lines between 738090 and 738099 inclusive
	awk 'NR==1 \|\| NR>=738090 && NR<=738099' bridge_shuffled.txt > few_line_shuf_source.txt

	# randomise or shuffle lines in a file
	shuf in.txt > out.txt

	# diff two sorted files
	diff <(sort file1.txt) <(sort file2.txt)

	# check for duplicate lines in folder of files
	for f in *.tsv; do sort $f \| uniq -c \| grep -v "^\s\+1" ; done

	# count files by creation date
	find . -type f -printf '%TY-%Tm-%Td\n' \| sort \| uniq -c