Created
May 18, 2016 07:03
-
-
Save NamPNQ/2ce913ed5c6d69bb90112f193d8d9878 to your computer and use it in GitHub Desktop.
Codeschool Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import asyncio | |
import aiohttp | |
import pyquery | |
from pyquery import PyQuery as pq | |
base = 'https://www.codeschool.com' | |
url = 'https://www.codeschool.com/paths' | |
paths = ['git', 'javascript'] | |
headers = { | |
'cookie': '' # give your cookie | |
} | |
sem = asyncio.Semaphore(5) | |
async def fetch_video(session, url): | |
path = './downloads/' + url.split('/')[-2] | |
if not os.path.exists(path): | |
os.makedirs(path) | |
async with session.get(url) as response: | |
assert response.status == 200 | |
body = await response.text() | |
text = 'new CS.Classes.VideoManager(' | |
if text not in body: | |
return | |
idx = body.rindex(text) | |
videos = body[idx + len(text):body.index('});', idx) + 1] | |
videos = json.loads(videos) | |
print(path, ' ', len(videos['media'])) | |
for video in videos['media']: | |
async with session.get(video['media']) as response: | |
body = await response.json() | |
with open(path + '/videos.txt', 'a') as f: | |
f.write(body['media'][0]['download'] + '\n') | |
async def fetch_course(session, url): | |
with (await sem): | |
async with session.get(url) as response: | |
assert response.status == 200 | |
body = await response.text() | |
d = pq(body) | |
for course in d('.course-title-link'): | |
await fetch_video(session, base + pq(course).attr('href') + '/videos') | |
loop = asyncio.get_event_loop() | |
with aiohttp.ClientSession(headers=headers, loop=loop) as session: | |
f = asyncio.wait([fetch_course(session, url + '/' + path) for path in paths]) | |
loop.run_until_complete(f) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
trap "exit" 2 | |
for path in `ls downloads/`; do | |
echo "downloading $path" | |
aria2c -d "downloads/$path" -i "downloads/$path/videos.txt" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment