Created
January 17, 2014 05:55
-
-
Save dearmark/8469006 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import sys | |
import re | |
import urllib | |
cookie = {'PHPSESSID': 'phulssv3eg4bijf3cb423beve4'} | |
def get_lesson_urls(html): | |
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', html) | |
return [url for url in urls if 'lesson' in url] | |
def get_page(url): | |
rv = requests.get(url, cookies=cookie) | |
return rv.text | |
def get_video_url(html): | |
result = re.search(r"download_url':'(.+?)'", html) | |
url = result.groups()[0] | |
return urllib.unquote(url) | |
def main(): | |
try: | |
course_url = sys.argv[1] | |
except IndexError: | |
print 'The first parameter must be course URL' | |
exit(-1) | |
print 'Course URL:', course_url | |
print 'Getting all course URLs' | |
html = get_page(course_url) | |
urls = get_lesson_urls(html) | |
print urls | |
for url in urls: | |
# print 'Getting video URL for', url | |
html = get_page(url) | |
print get_video_url(html) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment