bussiere · August 13, 2010 13:38
diff --git a/Downloader.py b/Downloader.py
 #what :give this program an url from a pciture and it will try to download all the list of existing picture exemple : downloader.py #http://www.example.com/picture01.jpg
 #why : some times i've got a link like http://www.example.com/picture01.jpg and i guess that there are more picture of it this program will try #to find them all and download them all

 """
 By bussiere : bussiere @at gmail.com
 """

 __Author__ ="bussiere"
 __Email__ = "bussiere @at gmail.com"
 __Titre__ = "downloading a serial list of picture from the web"
 __Description__ = "give this program an url from a pciture and it will try to download all the list of existing picture exemple : downloader.py http://www.example.com/picture01.jpg"
 __Discussion__ = "some times i've got a link like http://www.example.com/picture01.jpg and i guess that there are more picture of it this program will try to find them all and download them all"
 __Tags__ ="download picture pictures jpg JPG url list series serie"
 import re
 import time,datetime
 import urllib2

 def download_pictures(url,start=1,limit=999,trial=3):
    #we compile the regular exepression that will get the number and the point of the url as 01. in the http://www.example.com/picture01.jpg
    reg = re.compile("[0-9]+\.")
    # here we compile a regular expression for find html in data
    reghtml = re.compile('HTML')
    # we search the numbers with . in the url
    result = reg.search(url)
    # we get the begining position and the ending position of the numbers with .
    begin , end =  result.span()
    # we don't need the . at the end so we finish it earlier
    end =  end - 1
    #this string wil contain some zero because sometimes in the url it's 0001.jpg or 01.jpg or 1.jpg
    zero = ''
    #the i will count the zero
    i = 0
    #we put how 0 in zero that we have number in the url save one
    while  i < end -begin-1 :
        zero += '0'
        i += 1
    #here we get the extension a the end of the url .jpg here
    extension = url[end:]
    #count will determine how many times we have tried to download a file
    count = 0
    #while the download number is inferior of the limit let's goes on
    while start < limit :
        #we get the time
        t = datetime.datetime.now()
        # we make a file name with the epoch to be sure that the name will exist just one time
        name = "%s.%s"%(time.mktime(t.timetuple()),extension)
        # we make the url to download the file based on the url mixed with zero and the number of the download
        urlbis = "%s%s%s%s"%(url[:begin],zero,start,url[end:])
        #we try to download the file
        try :
            #here we open the url
            remoteFile = urllib2.urlopen(urlbis)
            #we read the file that we get
            data = remoteFile.read()
            # if the file does not exist sometimes we get a 404 error with the word HTML in it
            #so we check for it and if it is not in.
            if not reghtml.search(data) :
                #we create a file with the epoch name
                localFile = open(name, "wb")
                # we write the data in it
                localFile.write(data)
                #we close it
                localFile.close()
            else :
                #if there is the word HTML in it 
                #we end the loop
                break
        #here it is if we can't get any file at all
        except :
            # if the zero count equal zero we increase the count and put zero blank
            if i == 0 :
                zero = ''
                count += 1
            #here we just sustract zero
            else :
                # we reduce the zero counter
                i = i - 1   
                # we substract one zero
                zero = zero[:i] 
                # and we substract one to the counter of file to make it retry this file with one zero less
                start = start - 1           
        # if the failure counter is equal to the trial number we get out
        if count > trial  :
            break
        #we just increase the file counter
        start += 1
        

        
    


 def main(argv=None):
    # we get the argument passed on the command line
    argv = sys.argv
    #we initialize the argument at none
    url = None
    #at which number we begin to download
    start =  None
    #how many download ?
    limit = None
    # how many time we will try to download
    trial = None
    #we get the argument in a list
    options =  sys.argv[1:]
    #we put the option one after one and initialise them
    for option in options :
        if not url :
            url = option
        else :
            if not start :
                start = option
            else :
                if not limit :
                    limit = option
                else :
                    trial = trial
    #if some options ar empty we put the default value
    if not start :
        start = 1
    if not limit :
        limit = 999 + start
    if not trial :
        trial = 3 
    #we call the download picture
    download_pictures(url,start,limit,trial)


 if __name__ == "__main__":
    import sys
    #we call the main function
    sys.exit(main())
	#what :give this program an url from a pciture and it will try to download all the list of existing picture exemple : downloader.py #http://www.example.com/picture01.jpg
	#why : some times i've got a link like http://www.example.com/picture01.jpg and i guess that there are more picture of it this program will try #to find them all and download them all

	"""
	By bussiere : bussiere @at gmail.com
	"""

	__Author__ ="bussiere"
	__Email__ = "bussiere @at gmail.com"
	__Titre__ = "downloading a serial list of picture from the web"
	__Description__ = "give this program an url from a pciture and it will try to download all the list of existing picture exemple : downloader.py http://www.example.com/picture01.jpg"
	__Discussion__ = "some times i've got a link like http://www.example.com/picture01.jpg and i guess that there are more picture of it this program will try to find them all and download them all"
	__Tags__ ="download picture pictures jpg JPG url list series serie"
	import re
	import time,datetime
	import urllib2

	def download_pictures(url,start=1,limit=999,trial=3):
	#we compile the regular exepression that will get the number and the point of the url as 01. in the http://www.example.com/picture01.jpg
	reg = re.compile("[0-9]+\.")
	# here we compile a regular expression for find html in data
	reghtml = re.compile('HTML')
	# we search the numbers with . in the url
	result = reg.search(url)
	# we get the begining position and the ending position of the numbers with .
	begin , end = result.span()
	# we don't need the . at the end so we finish it earlier
	end = end - 1
	#this string wil contain some zero because sometimes in the url it's 0001.jpg or 01.jpg or 1.jpg
	zero = ''
	#the i will count the zero
	i = 0
	#we put how 0 in zero that we have number in the url save one
	while i < end -begin-1 :
	zero += '0'
	i += 1
	#here we get the extension a the end of the url .jpg here
	extension = url[end:]
	#count will determine how many times we have tried to download a file
	count = 0
	#while the download number is inferior of the limit let's goes on
	while start < limit :
	#we get the time
	t = datetime.datetime.now()
	# we make a file name with the epoch to be sure that the name will exist just one time
	name = "%s.%s"%(time.mktime(t.timetuple()),extension)
	# we make the url to download the file based on the url mixed with zero and the number of the download
	urlbis = "%s%s%s%s"%(url[:begin],zero,start,url[end:])
	#we try to download the file
	try :
	#here we open the url
	remoteFile = urllib2.urlopen(urlbis)
	#we read the file that we get
	data = remoteFile.read()
	# if the file does not exist sometimes we get a 404 error with the word HTML in it
	#so we check for it and if it is not in.
	if not reghtml.search(data) :
	#we create a file with the epoch name
	localFile = open(name, "wb")
	# we write the data in it
	localFile.write(data)
	#we close it
	localFile.close()
	else :
	#if there is the word HTML in it
	#we end the loop
	break
	#here it is if we can't get any file at all
	except :
	# if the zero count equal zero we increase the count and put zero blank
	if i == 0 :
	zero = ''
	count += 1
	#here we just sustract zero
	else :
	# we reduce the zero counter
	i = i - 1
	# we substract one zero
	zero = zero[:i]
	# and we substract one to the counter of file to make it retry this file with one zero less
	start = start - 1
	# if the failure counter is equal to the trial number we get out
	if count > trial :
	break
	#we just increase the file counter
	start += 1






	def main(argv=None):
	# we get the argument passed on the command line
	argv = sys.argv
	#we initialize the argument at none
	url = None
	#at which number we begin to download
	start = None
	#how many download ?
	limit = None
	# how many time we will try to download
	trial = None
	#we get the argument in a list
	options = sys.argv[1:]
	#we put the option one after one and initialise them
	for option in options :
	if not url :
	url = option
	else :
	if not start :
	start = option
	else :
	if not limit :
	limit = option
	else :
	trial = trial
	#if some options ar empty we put the default value
	if not start :
	start = 1
	if not limit :
	limit = 999 + start
	if not trial :
	trial = 3
	#we call the download picture
	download_pictures(url,start,limit,trial)


	if __name__ == "__main__":
	import sys
	#we call the main function
	sys.exit(main())
No results found