Last active
June 20, 2020 06:17
-
-
Save mahdi-malv/da73cd68a55a4e81c7386c3013bed670 to your computer and use it in GitHub Desktop.
Download a course from caster.io using scrapy and youtube-dl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Prerequisites: | |
1. Scrapy: python3 -m pip install scrapy | |
2. Youtube-dl: python3 -m pip install youtube-dl | |
(Use conda or miniconda for windows to install scrapy. Or use WSL and install python3 on it) | |
It's also possible to modify this to get other courses from caster.io or even other sites. | |
""" | |
from scrapy.spiders import Spider | |
from scrapy.crawler import CrawlerProcess | |
import youtube_dl | |
ydl = youtube_dl.YoutubeDL() | |
class CasterKotlin(Spider): | |
name = "caster" | |
start_urls = ["https://caster.io/courses/kotlin-programming-language"] # Donn Felker's Kotlin course on Caster.io | |
def parse(self, response): | |
# Get the list of course | |
lessons = list(map(lambda x: x.get(), response.css('a[class="cioc-card cioc-cardgroup__item cioc-lessoncard"]::attr(href)'))) | |
filename = 'caster-links.txt' | |
with open(filename, 'w') as f: | |
for i in lessons: f.write(i + "\n") | |
self.log('Links are saved at %s' % filename) | |
# get them via youtube | |
self.log("Attempting to download links") | |
ydl.download(lessons) | |
process = CrawlerProcess({ 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' }) | |
process.crawl(CasterKotlin) | |
process.start() | |
""" | |
Finally run the scripts using `python3 caster.py` command. | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment