berdosi · December 2, 2017 19:32
diff --git a/wordfrequency.sh b/wordfrequency.sh
 cat *.html | sed -e 's/<[^>]\+>//g' -e 's/[ \t]\+/\n/g' -e 's/[-\.\\(\\):,;0-9+|]//g'|sort | uniq -ci | sort -h

 cat *.html | \            # all the files' contents
 sed -e 's/<[^>]\+>//g' \  # without tags (assumes they don't contain line breaks)
 -e 's/[ \t]\+/\n/g' \     # replace tabs and spaces with line breaks
 -e 's/[^a-z]//gi' | \     # remove some non-letters (
 sort | \                  # sort once to make uniq work
 uniq -ci | \              # show occurrence counts, case insensitive
 sort -h                   # sort by numbers
	cat *.html \| sed -e 's/<[^>]\+>//g' -e 's/[ \t]\+/\n/g' -e 's/[-\.\\(\\):,;0-9+\|]//g'\|sort \| uniq -ci \| sort -h

	cat *.html \| \ # all the files' contents
	sed -e 's/<[^>]\+>//g' \ # without tags (assumes they don't contain line breaks)
	-e 's/[ \t]\+/\n/g' \ # replace tabs and spaces with line breaks
	-e 's/[^a-z]//gi' \| \ # remove some non-letters (
	sort \| \ # sort once to make uniq work
	uniq -ci \| \ # show occurrence counts, case insensitive
	sort -h # sort by numbers