Joshfindit · April 16, 2014 02:49
diff --git a/Looking for duplicates b/Looking for duplicates
 #First, create a folder and copy the raw text from each page in to it's own text file.
 #Example: harv_eker1.txt , harv_eker2.txt , harv_eker3.txt , and so on.
 #Open a terminal window in that folder, and run the following:

  cat *.txt | tr -d '[:punct:]' | tr ' ' '\n' | tr 'A-Z' 'a-z' | sort | uniq -c | sort -rn

 #most-used words will be at the top
	#First, create a folder and copy the raw text from each page in to it's own text file.
	#Example: harv_eker1.txt , harv_eker2.txt , harv_eker3.txt , and so on.
	#Open a terminal window in that folder, and run the following:

	cat *.txt \| tr -d '[:punct:]' \| tr ' ' '\n' \| tr 'A-Z' 'a-z' \| sort \| uniq -c \| sort -rn

	#most-used words will be at the top