Created
November 6, 2022 02:00
-
-
Save YourFriendCaspian/a5622c50b8bd55edd2b746ba87c0cfd4 to your computer and use it in GitHub Desktop.
Script to download all MagPi PDF's - skips existing PDF's - pip install bs4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Download all MagPis available | |
''' | |
#!/usr/bin/env python3 | |
# pip install bs4 | |
import ntpath | |
import os | |
import sys | |
import urllib.request | |
from pathlib import Path | |
import requests | |
from bs4 import BeautifulSoup | |
# Colored output | |
class Color: | |
''' | |
Define colors for cli | |
''' | |
GREEN = '\033[92m' | |
RED = '\033[91m' | |
END = '\033[0m' | |
# URL Request | |
def request(magpi_url): | |
''' | |
Gets the content of the site | |
''' | |
opener = urllib.request.build_opener() | |
opener.addheaders = [('User-agent', 'Mozilla/5.0')] | |
urllib.request.install_opener(opener) | |
openurl = urllib.request.urlopen(urllib.request.Request(magpi_url)) | |
return openurl.read().decode("utf-8") | |
# Get all released issues | |
def get_issues(magpi_url, magpi_ext): | |
''' | |
Filter out the magpi releases | |
''' | |
soup = BeautifulSoup(request(magpi_url), 'html.parser') | |
return [magpi_url + node.get('href') for node in soup.find_all('a') | |
if node.get('href').endswith(magpi_ext)] | |
def download(magpi_url, filename): | |
''' | |
Downloads stuff | |
''' | |
with open(filename, 'wb') as file: | |
response = requests.get(magpi_url, stream=True) | |
total = response.headers.get('content-length') | |
if total is None: | |
file.write(response.content) | |
else: | |
downloaded = 0 | |
total = int(total) | |
for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)): | |
downloaded += len(data) | |
file.write(data) | |
done = int(50*downloaded/total) | |
sys.stdout.write('\r[{}{}]'.format( | |
'█' * done, '.' * (50-done))) | |
sys.stdout.flush() | |
sys.stdout.write('\n') | |
def main(): | |
''' | |
Run this thing | |
''' | |
# Directory | |
while True: | |
dest = input("Choose download path. Default is ~/Downloads\n: ") | |
if dest: | |
if not os.path.exists(dest): | |
print("Path does not exist. Try again.") | |
continue | |
if not dest: | |
dest = os.path.join(Path.home(), "Downloads") | |
break | |
# MagPi | |
magpi_url = "https://www.raspberrypi.org/magpi-issues/" | |
magpi_ext = ".pdf" | |
for issue in get_issues(magpi_url, magpi_ext): | |
basename = ntpath.basename(issue) | |
file = Path(dest, basename) | |
if file.exists(): | |
print(Color.RED, "Exists:", basename, Color.END) | |
continue | |
else: | |
print("\n", Color.GREEN, "Downloading", basename, Color.END) | |
download(issue, file) | |
if __name__ == '__main__': | |
try: | |
main() | |
except KeyboardInterrupt: | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment