xavvvier · December 13, 2018 19:36
diff --git a/download.py b/download.py
 import urllib.request
 import urllib.parse
 import html
 import re
 import sys

 def main():
    if len(sys.argv) < 2:
        print_usage()
        sys.exit(1)
    else:
        #Parse the arguments
        url = sys.argv[1]
        start_index = 0
        if len(sys.argv) > 2 and sys.argv[2].isdigit():
            start_index = int(sys.argv[2]) - 1
        
        #Read the url content
        fp = urllib.request.urlopen(url)
        data = fp.read()
        content = data.decode('utf8')
        fp.close()

        #Match the a elements and capture the href values ending in .mp3
        match = re.findall("<a href=\"(?P<name>.+\.mp3)\">", content)
       
        #iterate on all found items
        print("Found", len(match), "item(s)")
        index = start_index
        total_items = len(match)
        while(index < total_items):
            item = match[index]
            #Remove text before slash / character
            last_index = item.rfind("/")
            if last_index >= 0:
                item = item[last_index+1:]
            file_name = urllib.parse.unquote(html.unescape(item))
            print("Processing", index +1, "of", total_items, file_name)
            file_url = fp.url + html.unescape(item)
            download(file_url, file_name)
            index += 1

 def download(url, filename):
    print("requesting", url)
    fp = urllib.request.urlopen(url)
    data = fp.read()
    with open(filename, 'wb') as new_file:
        new_file.write(data)
    fp.close()

 def print_usage():
    print("Usage: python3 download.py url_of_index_page [start_at]")

 main()
	import urllib.request
	import urllib.parse
	import html
	import re
	import sys

	def main():
	if len(sys.argv) < 2:
	print_usage()
	sys.exit(1)
	else:
	#Parse the arguments
	url = sys.argv[1]
	start_index = 0
	if len(sys.argv) > 2 and sys.argv[2].isdigit():
	start_index = int(sys.argv[2]) - 1

	#Read the url content
	fp = urllib.request.urlopen(url)
	data = fp.read()
	content = data.decode('utf8')
	fp.close()

	#Match the a elements and capture the href values ending in .mp3
	match = re.findall("<a href=\"(?P<name>.+\.mp3)\">", content)

	#iterate on all found items
	print("Found", len(match), "item(s)")
	index = start_index
	total_items = len(match)
	while(index < total_items):
	item = match[index]
	#Remove text before slash / character
	last_index = item.rfind("/")
	if last_index >= 0:
	item = item[last_index+1:]
	file_name = urllib.parse.unquote(html.unescape(item))
	print("Processing", index +1, "of", total_items, file_name)
	file_url = fp.url + html.unescape(item)
	download(file_url, file_name)
	index += 1

	def download(url, filename):
	print("requesting", url)
	fp = urllib.request.urlopen(url)
	data = fp.read()
	with open(filename, 'wb') as new_file:
	new_file.write(data)
	fp.close()

	def print_usage():
	print("Usage: python3 download.py url_of_index_page [start_at]")

	main()