Last active
March 25, 2022 06:12
-
-
Save zxteloiv/a99255ab7ae2da34e398831eb0a458c1 to your computer and use it in GitHub Desktop.
download the bunch of nltk data instead of the nltk.downloader module
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# refer to https://www.nltk.org/data.html for more information; | |
# this gist belongs to the "manual installation" section. | |
function nltk_fetch { | |
curl https://www.nltk.org/nltk_data/ | ggrep -Po 'url=[^ ]*' | awk -F '"' '/githubuser/ {print $2}' | awk -F 'packages/' '{print $2" "$0}' > copora-name-url.txt; | |
} | |
function nltk_download_item { | |
mkdir -p nltk_data/$(dirname $1); | |
echo download $1 from $2 ... ; | |
wget $2 -O nltk_data/$1; | |
}; | |
# download each url | |
function nltk_download_zip { | |
while IFS=' ' read -r filename url | |
do | |
download $filename $url | |
done <copora-name-url.txt | |
} | |
# unzip each files | |
function nltk_unpack_zips { | |
while IFS=' ' read -r filename url | |
do | |
path=$(dirname $filename) | |
unzip -d nltk_data/$path nltk_data/$filename | |
done <copora-name-url.txt | |
} | |
function nltk_remove_zips { | |
find nltk_data -name '*.zip' | xargs rm | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment