Created
January 18, 2021 08:39
-
-
Save NerOcrO/d5713388e12e5933dcb8e6f75957a503 to your computer and use it in GitHub Desktop.
occurrence words
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# sudo apt install poppler-utils | |
# pdftotext -layout [FILE].pdf [FILE].txt | |
# ./count_words.sh [FILE].txt | |
if [ $# -ne 1 ]; | |
then | |
echo "Usage: $0 filename" | |
exit -1 | |
fi | |
filename=$1 | |
# Tous les mots suppérieurs à 3 caractères. | |
egrep -o "\b[[:alpha:]]+{3,}\b" $filename | \ | |
# Transformation d'un mot en minuscule. | |
tr '[:upper:]' '[:lower:]' | \ | |
# Affichage du nombre d'occurrence et du mot. | |
# Affichage à la fin du nombre total de mot. | |
awk ' | |
{ count[$0]++ } | |
END { | |
number_of_words=0 | |
for (word in count) { | |
printf("%-5d%s\n", count[word], word); | |
number_of_words++; | |
} | |
printf("%d", number_of_words); | |
}' | \ | |
# Tier du plus petit au plus grand. | |
sort -g |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment