Last active
September 27, 2018 21:10
-
-
Save RamseyK/f547eb430f225f5d829cd5096f09a99e to your computer and use it in GitHub Desktop.
Download Apple Darwin Open Source tarballs - Useful for mirroring Apple sources
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import sys | |
import os | |
import requests | |
import re | |
def download(url, output_dir): | |
# Check to see if the tarball has already been downloaded to the output dir | |
fn = os.path.basename(url) | |
filepath = os.path.join(os.path.join(output_dir, fn)) | |
if os.path.exists(filepath): | |
print("{} already exists, skipping.\n".format(filepath)) | |
return | |
# Download tarball | |
print("Downloading {} ...".format(fn)) | |
r = requests.get(url, stream=True) | |
with open(filepath, 'wb') as fh: | |
for chunk in r.iter_content(chunk_size=1024): | |
fh.write(chunk) | |
def main(): | |
parser = argparse.ArgumentParser(description='A tool to download the Apple sources for a particular release') | |
parser.add_argument('-u', '--url', required=True, type=str, help='The sources page for a particular release. ie. https://opensource.apple.com/release/macos-10133.html') | |
parser.add_argument('-o', '--output', required=True, type=str, help='Output directory to download files to') | |
args = parser.parse_args() | |
if 'opensource.apple.com' not in args.url: | |
print("This doesnt look like a valid URL from opensource.apple.com. Aborting") | |
return -1 | |
if not args.output or not os.path.isdir(args.output): | |
print("Output must point to an output directory that exists") | |
parser.print_help() | |
return -1 | |
version = os.path.basename(args.url).split('.')[0] | |
output_dir = os.path.join(args.output, version) | |
if not os.path.exists(output_dir): | |
os.mkdir(output_dir) | |
print("Download directory: {}".format(output_dir)) | |
# Download content of the page | |
page_content = None | |
try: | |
page_content = requests.get(args.url) | |
except Exception as e: | |
print("Could not retrieve target content at url: {}".format(e)) | |
return -1 | |
# Find the relative links for each of the tarballs and download them | |
expr = re.compile(r'<a href="(?P<relpath>.*)">') | |
for line in page_content.content.splitlines(): | |
line = line.decode('utf-8').strip() | |
if 'tar.gz">' not in line: | |
continue | |
m = expr.search(line) | |
if not m: | |
continue | |
relpath = m.groups('relpath')[0] | |
download('https://opensource.apple.com' + relpath, output_dir) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment