Skip to content

Instantly share code, notes, and snippets.

@RamseyK
Last active September 27, 2018 21:10
Show Gist options
  • Save RamseyK/f547eb430f225f5d829cd5096f09a99e to your computer and use it in GitHub Desktop.
Save RamseyK/f547eb430f225f5d829cd5096f09a99e to your computer and use it in GitHub Desktop.
Download Apple Darwin Open Source tarballs - Useful for mirroring Apple sources
import argparse
import sys
import os
import requests
import re
def download(url, output_dir):
# Check to see if the tarball has already been downloaded to the output dir
fn = os.path.basename(url)
filepath = os.path.join(os.path.join(output_dir, fn))
if os.path.exists(filepath):
print("{} already exists, skipping.\n".format(filepath))
return
# Download tarball
print("Downloading {} ...".format(fn))
r = requests.get(url, stream=True)
with open(filepath, 'wb') as fh:
for chunk in r.iter_content(chunk_size=1024):
fh.write(chunk)
def main():
parser = argparse.ArgumentParser(description='A tool to download the Apple sources for a particular release')
parser.add_argument('-u', '--url', required=True, type=str, help='The sources page for a particular release. ie. https://opensource.apple.com/release/macos-10133.html')
parser.add_argument('-o', '--output', required=True, type=str, help='Output directory to download files to')
args = parser.parse_args()
if 'opensource.apple.com' not in args.url:
print("This doesnt look like a valid URL from opensource.apple.com. Aborting")
return -1
if not args.output or not os.path.isdir(args.output):
print("Output must point to an output directory that exists")
parser.print_help()
return -1
version = os.path.basename(args.url).split('.')[0]
output_dir = os.path.join(args.output, version)
if not os.path.exists(output_dir):
os.mkdir(output_dir)
print("Download directory: {}".format(output_dir))
# Download content of the page
page_content = None
try:
page_content = requests.get(args.url)
except Exception as e:
print("Could not retrieve target content at url: {}".format(e))
return -1
# Find the relative links for each of the tarballs and download them
expr = re.compile(r'<a href="(?P<relpath>.*)">')
for line in page_content.content.splitlines():
line = line.decode('utf-8').strip()
if 'tar.gz">' not in line:
continue
m = expr.search(line)
if not m:
continue
relpath = m.groups('relpath')[0]
download('https://opensource.apple.com' + relpath, output_dir)
return 0
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment