Created
May 3, 2018 21:40
-
-
Save vovanbo/3711fff76820836b7f51d2e9b022fd47 to your computer and use it in GitHub Desktop.
Find common strings in files via awk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
awk 'END { | |
# the END block is executed after | |
# all the input has been read | |
# loop over the rec array | |
# and build the dup array indxed by the nuber of | |
# filenames containing a given record | |
for (R in rec) { | |
n = split(rec[R], t, "/") | |
if (n > 1) | |
dup[n] = dup[n] ? dup[n] RS sprintf("\t%-20s -->\t%s", rec[R], R) : \ | |
sprintf("\t%-20s -->\t%s", rec[R], R) | |
} | |
# loop over the dup array | |
# and report the number and the names of the files | |
# containing the record | |
for (D in dup) { | |
printf "records found in %d files:\n\n", D | |
printf "%s\n\n", dup[D] | |
} | |
} | |
{ | |
# build an array named rec (short for record), indexed by | |
# the content of the current record ($0), concatenating | |
# the filenames separated by / as values | |
rec[$0] = rec[$0] ? rec[$0] "/" FILENAME : FILENAME | |
}' $* | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment