RamseyK · September 27, 2018 21:10
diff --git a/apple_source_download.py b/apple_source_download.py
 import argparse
 import sys
 import os
 import requests
 import re


 def download(url, output_dir):

    # Check to see if the tarball has already been downloaded to the output dir
    fn = os.path.basename(url)
    filepath = os.path.join(os.path.join(output_dir, fn))
    if os.path.exists(filepath):
        print("{} already exists, skipping.\n".format(filepath))
        return

    # Download tarball
    print("Downloading {} ...".format(fn))

    r = requests.get(url, stream=True)
    with open(filepath, 'wb') as fh:
        for chunk in r.iter_content(chunk_size=1024):
            fh.write(chunk)


 def main():

    parser = argparse.ArgumentParser(description='A tool to download the Apple sources for a particular release')
    parser.add_argument('-u', '--url', required=True, type=str, help='The sources page for a particular release. ie. https://opensource.apple.com/release/macos-10133.html')
    parser.add_argument('-o', '--output', required=True, type=str, help='Output directory to download files to')

    args = parser.parse_args()

    if 'opensource.apple.com' not in args.url:
        print("This doesnt look like a valid URL from opensource.apple.com.  Aborting")
        return -1

    if not args.output or not os.path.isdir(args.output):
        print("Output must point to an output directory that exists")
        parser.print_help()
        return -1

    version = os.path.basename(args.url).split('.')[0]

    output_dir = os.path.join(args.output, version)
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    print("Download directory: {}".format(output_dir))

    # Download content of the page
    page_content = None
    try:
        page_content = requests.get(args.url)
    except Exception as e:
        print("Could not retrieve target content at url: {}".format(e))
        return -1

    # Find the relative links for each of the tarballs and download them
    expr = re.compile(r'<a href="(?P<relpath>.*)">')
    for line in page_content.content.splitlines():
        line = line.decode('utf-8').strip()
        if 'tar.gz">' not in line:
            continue

        m = expr.search(line)
        if not m:
            continue

        relpath = m.groups('relpath')[0]
        download('https://opensource.apple.com' + relpath, output_dir)

    return 0


 if __name__ == '__main__':
    sys.exit(main())
	import argparse
	import sys
	import os
	import requests
	import re


	def download(url, output_dir):

	# Check to see if the tarball has already been downloaded to the output dir
	fn = os.path.basename(url)
	filepath = os.path.join(os.path.join(output_dir, fn))
	if os.path.exists(filepath):
	print("{} already exists, skipping.\n".format(filepath))
	return

	# Download tarball
	print("Downloading {} ...".format(fn))

	r = requests.get(url, stream=True)
	with open(filepath, 'wb') as fh:
	for chunk in r.iter_content(chunk_size=1024):
	fh.write(chunk)


	def main():

	parser = argparse.ArgumentParser(description='A tool to download the Apple sources for a particular release')
	parser.add_argument('-u', '--url', required=True, type=str, help='The sources page for a particular release. ie. https://opensource.apple.com/release/macos-10133.html')
	parser.add_argument('-o', '--output', required=True, type=str, help='Output directory to download files to')

	args = parser.parse_args()

	if 'opensource.apple.com' not in args.url:
	print("This doesnt look like a valid URL from opensource.apple.com. Aborting")
	return -1

	if not args.output or not os.path.isdir(args.output):
	print("Output must point to an output directory that exists")
	parser.print_help()
	return -1

	version = os.path.basename(args.url).split('.')[0]

	output_dir = os.path.join(args.output, version)
	if not os.path.exists(output_dir):
	os.mkdir(output_dir)

	print("Download directory: {}".format(output_dir))

	# Download content of the page
	page_content = None
	try:
	page_content = requests.get(args.url)
	except Exception as e:
	print("Could not retrieve target content at url: {}".format(e))
	return -1

	# Find the relative links for each of the tarballs and download them
	expr = re.compile(r'<a href="(?P<relpath>.*)">')
	for line in page_content.content.splitlines():
	line = line.decode('utf-8').strip()
	if 'tar.gz">' not in line:
	continue

	m = expr.search(line)
	if not m:
	continue

	relpath = m.groups('relpath')[0]
	download('https://opensource.apple.com' + relpath, output_dir)

	return 0


	if __name__ == '__main__':
	sys.exit(main())