Skip to content

Instantly share code, notes, and snippets.

@TheSkorm
Last active September 30, 2015 00:27
Show Gist options
  • Select an option

  • Save TheSkorm/1692335 to your computer and use it in GitHub Desktop.

Select an option

Save TheSkorm/1692335 to your computer and use it in GitHub Desktop.
YouTube Scrapper
import gdata.youtube
import gdata.youtube.service
import Queue
import threading
import string
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
from urllib import urlopen, unquote
import urllib
from urlparse import parse_qs, urlparse
yt_service = gdata.youtube.service.YouTubeService()
from pytube import YouTube
class AppURLopener(urllib.FancyURLopener):
version = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7"
urllib._urlopener = AppURLopener()
def Search(search_terms):
yt_service = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
query.vq = search_terms
query.orderby = 'relevance'
query.racy = 'include'
feed = yt_service.YouTubeQuery(query)
return feed
##def PrintEntryDetails(entry):
## print 'Video title: %s' % entry.media.title.text
## print 'Video published on: %s ' % entry.published.text
## print 'Video description: %s' % entry.media.description.text
## print 'Video category: %s' % entry.media.category[0].text
## print 'Video tags: %s' % entry.media.keywords.text
## print 'Video watch page: %s' % entry.media.player.url
## print 'Video flash player URL: %s' % entry.GetSwfUrl()
## print 'Video duration: %s' % entry.media.duration.seconds
##
##
#### print 'Video rating: %s' % entry.rating.average
##
## # show alternate formats
## for alternate_format in entry.media.content:
## if 'isDefault' not in alternate_format.extension_attributes:
## print 'Alternate format: %s | url: %s ' % (alternate_format.type,
#### alternate_format.url)
####
## # show thumbnails
## for thumbnail in entry.media.thumbnail:
## print 'Thumbnail url: %s' % thumbnail.url
def Download_mp4(id):
video_id = id
print 'http://www.youtube.com/watch?v=' + video_id
yt = YouTube()
yt.url = 'http://www.youtube.com/watch?v=' + video_id
video = yt.filter(extension="mp4")[0]
video.download()
## elif "token" in stuff :
## url = stuff['token'][0]
## print url
## data = urlopen(url).read()
## return data
return
# open(video_id+'.mp4', 'wb').write(data)
def SearchnDownload (term):
feed = Search(term)
id = feed.entry[0].id.text.split("/")[-1]
data = Download_mp4(id)
class downloader(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
video = self.queue.get()
SearchnDownload(video)
self.queue.task_done()
a=""""""
queue = Queue.Queue()
for i in range(10):
t = downloader(queue)
t.setDaemon(True)
t.start()
for line in a.split("\n"):
queue.put(line.strip())
queue.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment