guziy · August 29, 2015 14:14
diff --git a/download_enhanced.py b/download_enhanced.py
 import urllib2
 import re
 import os

 #link format
 url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"

 #Year range of the data to be downloaded
 start_year = 2003
 end_year = 2003

 for year in range(start_year, end_year + 1):
    year_url = url.format(year)
    # get the html of the directory listing
    x = urllib2.urlopen(year_url).read()
    # Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
    fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x)
    print len(fnames)

    # Eliminate duplicates
    fnames = set(fnames)

    nfiles_per_year = len(fnames)
    for i, fname in enumerate(fnames):
        flink = os.path.join(year_url, fname)
        reader = urllib2.urlopen(flink)
        if os.path.isfile(fname): # No need to download the same file several times
            remote_file_size = int(reader.info().getheaders("Content-length")[0])
            local_file_size = os.path.getsize(fname)
            if local_file_size != remote_file_size: # The download was not completed for some reason
                os.remove(fname)
            else:
                continue                            # The file already exists and the size is OK

        # Write the local file to the disk
        with open(fname, "w") as f:
            print "Downloading {} ....".format(flink)
            f.write(reader.read())
            print "Downloaded {} of {} files for {} ".format(i + 1, nfiles_per_year, year)
        # Close the connection
        reader.close()

    print "Downloaded data for year {}".format(year)


 print "All downloads finished successfully"
	import urllib2
	import re
	import os

	#link format
	url = "http://www.globsnow.info/se/archive_v2.1/{}/D4SC/"

	#Year range of the data to be downloaded
	start_year = 2003
	end_year = 2003

	for year in range(start_year, end_year + 1):
	year_url = url.format(year)
	# get the html of the directory listing
	x = urllib2.urlopen(year_url).read()
	# Get all words starting with GlobSnow and ending with .nc.gz, ? - means non-greedy
	fnames = re.findall(r"GlobSnow.*?\.nc\.gz", x)
	print len(fnames)

	# Eliminate duplicates
	fnames = set(fnames)

	nfiles_per_year = len(fnames)
	for i, fname in enumerate(fnames):
	flink = os.path.join(year_url, fname)
	reader = urllib2.urlopen(flink)
	if os.path.isfile(fname): # No need to download the same file several times
	remote_file_size = int(reader.info().getheaders("Content-length")[0])
	local_file_size = os.path.getsize(fname)
	if local_file_size != remote_file_size: # The download was not completed for some reason
	os.remove(fname)
	else:
	continue # The file already exists and the size is OK

	# Write the local file to the disk
	with open(fname, "w") as f:
	print "Downloading {} ....".format(flink)
	f.write(reader.read())
	print "Downloaded {} of {} files for {} ".format(i + 1, nfiles_per_year, year)
	# Close the connection
	reader.close()

	print "Downloaded data for year {}".format(year)


	print "All downloads finished successfully"