Skip to content

Instantly share code, notes, and snippets.

@navinpai
Last active August 29, 2015 14:07
Show Gist options
  • Save navinpai/04932e9c82e659139a31 to your computer and use it in GitHub Desktop.
Save navinpai/04932e9c82e659139a31 to your computer and use it in GitHub Desktop.
Download all Naruto Shippuden Episodes from http://www.naruget.net/naruto-shippuden-episodes/ (380 Episodes, ~25GB on 8-10-14)
#!/usr/bin/python
import urllib2
from bs4 import BeautifulSoup
import requests
import re
import urllib
import sys
import os
episodeArray=[]
def download_file(filename,url):
'''
print "Downloading Episode: ",filename
local_filename = url.split('/')[-1]
r = requests.get(url, stream=True)
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
'''
#Using aria2 here instead of requests because aria2 is an EPIC download manager
os.system('aria2c -x 10 -j 5 -o '+filename+' '+url)
def get_episode(episodenumber,link):
r = requests.get(link)
source=re.findall(r'source src="(.*?)"',r.text)
download_file(str(episodenumber)+'.mp4',source[0])
def download_episodes():
episodeArray.reverse()
for episode in episodeArray:
r = requests.get(episode)
soup=BeautifulSoup(r.text)
link=re.findall(r'\+unescape\("(.*?)"\)',r.text)
iframelink=urllib2.unquote(link[0].encode("utf8"))
source=re.findall(r'src="(.*?)"',iframelink)
get_episode(r.url.split('/')[-2].split('-',1)[1],source[0])
def main():
r=requests.get('http://www.naruget.net/naruto-shippuden-episodes/')
soup=BeautifulSoup(r.text)
episodeList=soup.find('div', attrs={'id':'side-a'}).findAll('a')
for i in episodeList:
episodeArray.append(i['href'])
download_episodes();
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment