Last active
December 24, 2015 20:29
-
-
Save cclauss/6858439 to your computer and use it in GitHub Desktop.
David Beazley is one of the best trainers on Python topics. This script downloads several code bases from David's website (http://www.dabeaz.com) to a local directory.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# beazleyCodeDownload.py | |
# | |
# copy source code from http://www.dabeaz.com | |
# into local directory David_Beazley | |
import bs4, os, requests | |
codeBases = ('coroutines', 'generators', 'pydata', | |
'python3io', 'usenix2009/concurrent') | |
baseURLFmt = 'http://www.dabeaz.com/{}/' | |
destDirFmt = 'David_Beazley/{}/' | |
aboutFileText = """#!/usr/bin/env python | |
theURL = '{}' | |
import webbrowser; webbrowser.open(theURL)""" | |
def getEmbeddedPyURLs(inSoup): | |
for theURL in inSoup.find_all('a'): | |
if theURL['href'].endswith('.py'): | |
yield baseURL + theURL['href'] | |
def copyWebPageToFile(inSrceURL, inDestFileName): | |
with open(inDestFileName, 'w') as destFile: | |
destFile.write(requests.get(inSrceURL).text) | |
def writeAboutFile(inSrceURL, inDestFileName): | |
with open(inDestFileName, 'w') as destFile: | |
destFile.write(aboutFileText.format(inSrceURL)) | |
for codeBase in codeBases: | |
baseURL = baseURLFmt.format(codeBase) | |
codeBase = codeBase.lstrip('usenix2009/') | |
destDir = destDirFmt.format(codeBase) | |
try: os.makedirs(destDir) | |
except OSError: pass | |
theSoup = bs4.BeautifulSoup(requests.get(baseURL).text) | |
fileCount = 0 | |
fmt = 'Copying {}\n to {}...' | |
print(fmt.format(baseURL, destDir)) | |
for embededURL in getEmbeddedPyURLs(theSoup): | |
fileName = destDir + embededURL.rpartition('/')[2] | |
#print(fmt.format(embededURL, fileName)) | |
copyWebPageToFile(embededURL, fileName) | |
fileCount += 1 | |
fmt = '{} files copied into {}\n' | |
print(fmt.format(fileCount, destDir)) | |
aboutFileName = 'about{}.py'.format(codeBase.title()) | |
writeAboutFile(baseURL.rstrip('/'), destDir+aboutFileName) | |
print('Done. ' + '=' * 25) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment