chaonan99 · August 10, 2016 17:26
diff --git a/download_tgif.py b/download_tgif.py
 # [Description] This file download dataset from a set of urls
 # [Author] chaonan99
 # [Date] 08/09/2016
 # [Email] [email protected]

 import os,re,optparse,urllib,string
 import numpy as np

 parser = optparse.OptionParser()
 parser.add_option('-i', '--input', dest='infile', help='input txt file name')
 parser.add_option('-o', '--output_folder', dest='outfolder', help='output folder name')
 (opts, args) = parser.parse_args()

 if not os.path.isdir(opts.outfolder):
    os.mkdir(opts.outfolder)
 # Change the regular expression if you want to use this on other datasets. But it is somehow general solution to most application scenarios
 for url in np.asarray(re.compile(r'(https?:\/\/)?([\w\-\_\.]+)+(\/[\w\-\_]*)*(?![\w\-\_]+\.)(\/[\w\-\_\.]*)').findall(open(opts.infile).read())):
    if not os.path.isfile(opts.outfolder+url[-1]):
        print("Download: " + url[-1])
        urllib.urlretrieve(''.join(url), opts.outfolder+url[-1])
	# [Description] This file download dataset from a set of urls
	# [Author] chaonan99
	# [Date] 08/09/2016
	# [Email] [email protected]

	import os,re,optparse,urllib,string
	import numpy as np

	parser = optparse.OptionParser()
	parser.add_option('-i', '--input', dest='infile', help='input txt file name')
	parser.add_option('-o', '--output_folder', dest='outfolder', help='output folder name')
	(opts, args) = parser.parse_args()

	if not os.path.isdir(opts.outfolder):
	os.mkdir(opts.outfolder)
	# Change the regular expression if you want to use this on other datasets. But it is somehow general solution to most application scenarios
	for url in np.asarray(re.compile(r'(https?:\/\/)?([\w\-\_\.]+)+(\/[\w\-\_])(?![\w\-\_]+\.)(\/[\w\-\_\.]*)').findall(open(opts.infile).read())):
	if not os.path.isfile(opts.outfolder+url[-1]):
	print("Download: " + url[-1])
	urllib.urlretrieve(''.join(url), opts.outfolder+url[-1])