Skip to content

Instantly share code, notes, and snippets.

@guziy
Last active August 29, 2015 14:14
Show Gist options
  • Save guziy/9e12641ddaf44b213076 to your computer and use it in GitHub Desktop.
Save guziy/9e12641ddaf44b213076 to your computer and use it in GitHub Desktop.
import urllib2
import re
import os
#link format
url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"
#Year range of the data to be downloaded
start_year = 2003
end_year = 2003
for year in range(start_year, end_year + 1):
year_url = url.format(year)
# get the html of the directory listing
x = urllib2.urlopen(year_url).read()
# Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x)
print len(fnames)
# Eliminate duplicates
fnames = set(fnames)
nfiles_per_year = len(fnames)
for i, fname in enumerate(fnames):
flink = os.path.join(year_url, fname)
reader = urllib2.urlopen(flink)
if os.path.isfile(fname): # No need to download the same file several times
remote_file_size = int(reader.info().getheaders("Content-length")[0])
local_file_size = os.path.getsize(fname)
if local_file_size != remote_file_size: # The download was not completed for some reason
os.remove(fname)
else:
continue # The file already exists and the size is OK
# Write the local file to the disk
with open(fname, "w") as f:
print "Downloading {} ....".format(flink)
f.write(reader.read())
print "Downloaded {} of {} files for {} ".format(i + 1, nfiles_per_year, year)
# Close the connection
reader.close()
print "Downloaded data for year {}".format(year)
print "All downloads finished successfully"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment