-
-
Save aligusnet/6478289 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash | |
# global parameters | |
g_tmp_folder="ncdc_tmp"; | |
g_output_folder="ncdc_data"; | |
g_remote_host="ftp.ncdc.noaa.gov"; | |
g_remote_path="pub/data/noaa"; | |
# $1: folder_path | |
function create_folder { | |
if [ -d "$1" ]; then | |
rm -rf "$1"; | |
fi | |
mkdir "$1" | |
} | |
# $1: year to download | |
function download_data { | |
local source_url="ftp://$g_remote_host/$g_remote_path/$1" | |
wget -r -c -q --no-parent -P "$g_tmp_folder" "$source_url"; | |
} | |
# $1: year to process | |
function process_data { | |
local year="$1" | |
local local_path="$g_tmp_folder/$g_remote_host/$g_remote_path/$year" | |
local tmp_output_file="$g_tmp_folder/$year" | |
for file in $local_path/*; do | |
gunzip -c $file >> "$tmp_output_file" | |
done | |
zipped_file="$g_output_folder/$year.gz" | |
gzip -c "$tmp_output_file" >> "$zipped_file" | |
echo "created file: $zipped_file" | |
rm -rf "$local_path" | |
rm "$tmp_output_file" | |
} | |
# $1 - start year | |
# $2 - finish year | |
function main { | |
local start_year=1901 | |
local finish_year=1920 | |
if [ -n "$1" ]; then | |
start_year=$1 | |
fi | |
if [ -n "$2" ]; then | |
finish_year=$2 | |
fi | |
create_folder $g_tmp_folder | |
create_folder $g_output_folder | |
for year in `seq $start_year $finish_year`; do | |
download_data $year | |
process_data $year | |
done | |
rm -rf "$g_tmp_folder" | |
} | |
main $1 $2 |
Thank you
Thanks for the valuable script and valuable edit by crush-157 😄
thank you so much
Changed again to:
ftp://ftp.ncdc.noaa.gov/pub/data/noaa/
Thanks
cool great for this work
Thank you very much! :)
Thanks Alexander. This is great. 👍
Thanks. Cool.
That's really helpful! Thank you guys!
thanks for your comments and special thanks to @crush-157 for the fix.
Thanks a lot
worked perfectly
Thanks, It still works
I am running on mac and I get
gzip: ncdc_tmp/ftp.ncdc.noaa.gov/pub/data/noaa/1921/*.gz: No such file or directory
created file: ncdc_data/1921.gz
The above was due to non connectivity to internet. My bad.
Thanks, it works
Great script, works beautifully.The ftp server location has also been updated in the script, so nothing needs to be edited, the script works as it is.
Works excellant without a change, Thanks.
Awesome! Thanks
Download location has changed again. Also, I have introduced changes so the script does not try to run process_data on files that have not been downloaded, and prints information about failed downloads to stderr. https://gist.github.com/rehevkor5/2e407950ca687b36fc54