- Parameterize & command line ops
- After scraping, see if file with matching MD5 already exists in output dir
- Allow for downloading
-major
releases, not just-update
- Clean up (this is currently just a hacked-together script)
Last active
October 30, 2019 19:23
-
-
Save ahogen/5f49d8399d7250ed64696f03e1ed2f6b to your computer and use it in GitHub Desktop.
Download latest GNU GCC compiler from ARM's website (for Cortex-M and Cortex-R processors).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Download latest GNU Toolchain from ARM's website | |
Author: Alex Hogen ([email protected]) | |
MIT License | |
Copyright (c) 2019 Alexander Hogen | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
""" | |
import sys | |
import requests | |
import wget | |
import time | |
import regex as re | |
import hashlib | |
from bs4 import BeautifulSoup | |
LINUX_64 = { | |
"title": 'Linux 64-bit Tarball', | |
"fname_regex": r'gcc-arm-none-eabi-.+-linux\.tar\.bz2'} | |
WIN_7p = { | |
"title": 'Windows 32-bit Installer Signed for Windows 7 and later)', | |
"fname_regex": r'gcc-arm-none-eabi-.+-win32-sha2\.exe'} | |
def get_fname_and_md5_hash(soup: BeautifulSoup, platform_config: dict): | |
fname = '' | |
md5 = '' | |
for dl in soup.find_all("dl"): | |
# both <dt> and <dd> children of <dl> | |
for i in dl: | |
if re.match(platform_config['fname_regex'], i.string) is not None: | |
fname = i.string | |
md5_match = re.match(r'MD5:\s([0-9a-f]{32})', i.string) | |
if (fname != '') and (md5_match is not None): | |
md5 = md5_match[1] | |
# Got 'em | |
return (fname, md5) | |
if fname != '': | |
if re.match(re.escape(platform_config['title']), dl.dd.text) is None: | |
print("<dd> doesn't look right. skipping this <dt> match") | |
fname = '' | |
else: | |
break | |
raise RuntimeWarning("Couldn't find filename and MD5 hash for platform") | |
def get_file_download_url(soup: BeautifulSoup, platform_config: dict, fname: str): | |
url = '' | |
regex = re.compile(r'(\/-\/media\/Files\/downloads\/.*' + re.escape(fname) + r').*') | |
dl_buttons = soup.find_all(class_='c-button') | |
for b in dl_buttons: | |
try: | |
match = re.match(regex, b['data-href']) | |
if match is not None: | |
url = match[1] | |
break | |
except KeyError: | |
pass | |
url = "https://developer.arm.com" + url | |
return (url) | |
def verify_md5(fname: str, expected_md5: str): | |
hash_md5 = hashlib.md5() | |
with open(fname, 'rb') as f: | |
for chunk in iter(lambda: f.read(4096), b""): | |
hash_md5.update(chunk) | |
print("\nMD5: ", end='') | |
if expected_md5 != hash_md5.hexdigest(): | |
print("FAIL!!") | |
print(" expected: ", expected_md5) | |
print(" actual: ", hash_md5.hexdigest()) | |
else: | |
print("okay") | |
return hash_md5.hexdigest() | |
if __name__ == "__main__": | |
if sys.version_info < (3, 5, 0): | |
sys.stderr.write("You need python 3.5 or later to run this script\n") | |
sys.exit(1) | |
url = 'https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads' | |
platform = LINUX_64 | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, "html.parser") | |
file = { | |
"name": '', | |
"md5": '', | |
"url": "", | |
"platform_conf": "LINUX_64" | |
} | |
file['name'], file['md5'] = get_fname_and_md5_hash(soup, platform) | |
file['url'] = get_file_download_url(soup, platform, file['name']) | |
print(file) | |
fname_downloaded = wget.download(file['url']) | |
print("Downloaded:", fname_downloaded) | |
verify_md5(fname_downloaded, file['md5']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment