BadUncleX · April 11, 2018 05:43
diff --git a/00 [python 抓取 coursera 字幕 ].md b/00 [python 抓取 coursera 字幕 ].md
diff --git a/downloadcourserasubtitle.py b/downloadcourserasubtitle.py
 ## 用2to3 从版本2转到3
 #Download all subtitle of videos on coursera for Machine Learning Course by Andrew Ng
 #Author: Hebi
 #Note: Before sunning the script create a Folder Named: 'subtitle' in the same directory

 import urllib.request, urllib.error, urllib.parse
 import os
 import base64


 url = 'https://class.coursera.org/ml-008/lecture/subtitles?q='
 url2 = '_zh&format=txt'
 #url2 = '_en&format=txt'
 print("Downloaded Subtitle No. ", end=' ') 
 for i in range(1,115):
    try:

        # urllib2.unquote('%20')
        remoteurl = urllib.request.urlopen(url+str(i)+url2)
        cd = remoteurl.info()['Content-Disposition'].split(';')
        cd2 = cd[1].split('=')[1].strip("\"'")
        filename = urllib.parse.unquote(cd2).replace("/","_")
        print("filename:", filename)

        # print "remoteurl.url,", remoteurl.url
        # print "parsename:",urllib2.urlparse.urlparse(remoteurl.url).path
        # filename = os.path.basename(urllib2.urlparse.urlparse(remoteurl.url).path)
        # print "filename,", filename
        #filename = cd['filename'].strip("\"'")
        page = remoteurl.read();

        f =  open('subtitle-cn/'+filename,'w+');
        f.write(page);
        f.close();
        print(i, end=' ');
    except (urllib.error.HTTPError, urllib.error.URLError) as e:
        print('\nProblem Downloading file: ',i)
        print('OR Connection Error')

 print('\nDownload Finished\nHappy Coding!!!')
	## 用2to3 从版本2转到3
	#Download all subtitle of videos on coursera for Machine Learning Course by Andrew Ng
	#Author: Hebi
	#Note: Before sunning the script create a Folder Named: 'subtitle' in the same directory

	import urllib.request, urllib.error, urllib.parse
	import os
	import base64


	url = 'https://class.coursera.org/ml-008/lecture/subtitles?q='
	url2 = '_zh&format=txt'
	#url2 = '_en&format=txt'
	print("Downloaded Subtitle No. ", end=' ')
	for i in range(1,115):
	try:

	# urllib2.unquote('%20')
	remoteurl = urllib.request.urlopen(url+str(i)+url2)
	cd = remoteurl.info()['Content-Disposition'].split(';')
	cd2 = cd[1].split('=')[1].strip("\"'")
	filename = urllib.parse.unquote(cd2).replace("/","_")
	print("filename:", filename)

	# print "remoteurl.url,", remoteurl.url
	# print "parsename:",urllib2.urlparse.urlparse(remoteurl.url).path
	# filename = os.path.basename(urllib2.urlparse.urlparse(remoteurl.url).path)
	# print "filename,", filename
	#filename = cd['filename'].strip("\"'")
	page = remoteurl.read();

	f = open('subtitle-cn/'+filename,'w+');
	f.write(page);
	f.close();
	print(i, end=' ');
	except (urllib.error.HTTPError, urllib.error.URLError) as e:
	print('\nProblem Downloading file: ',i)
	print('OR Connection Error')

	print('\nDownload Finished\nHappy Coding!!!')