Last active
September 30, 2015 00:27
-
-
Save TheSkorm/1692335 to your computer and use it in GitHub Desktop.
YouTube Scrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gdata.youtube | |
| import gdata.youtube.service | |
| import Queue | |
| import threading | |
| import string | |
| valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) | |
| from urllib import urlopen, unquote | |
| import urllib | |
| from urlparse import parse_qs, urlparse | |
| yt_service = gdata.youtube.service.YouTubeService() | |
| from pytube import YouTube | |
| class AppURLopener(urllib.FancyURLopener): | |
| version = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7" | |
| urllib._urlopener = AppURLopener() | |
| def Search(search_terms): | |
| yt_service = gdata.youtube.service.YouTubeService() | |
| query = gdata.youtube.service.YouTubeVideoQuery() | |
| query.vq = search_terms | |
| query.orderby = 'relevance' | |
| query.racy = 'include' | |
| feed = yt_service.YouTubeQuery(query) | |
| return feed | |
| ##def PrintEntryDetails(entry): | |
| ## print 'Video title: %s' % entry.media.title.text | |
| ## print 'Video published on: %s ' % entry.published.text | |
| ## print 'Video description: %s' % entry.media.description.text | |
| ## print 'Video category: %s' % entry.media.category[0].text | |
| ## print 'Video tags: %s' % entry.media.keywords.text | |
| ## print 'Video watch page: %s' % entry.media.player.url | |
| ## print 'Video flash player URL: %s' % entry.GetSwfUrl() | |
| ## print 'Video duration: %s' % entry.media.duration.seconds | |
| ## | |
| ## | |
| #### print 'Video rating: %s' % entry.rating.average | |
| ## | |
| ## # show alternate formats | |
| ## for alternate_format in entry.media.content: | |
| ## if 'isDefault' not in alternate_format.extension_attributes: | |
| ## print 'Alternate format: %s | url: %s ' % (alternate_format.type, | |
| #### alternate_format.url) | |
| #### | |
| ## # show thumbnails | |
| ## for thumbnail in entry.media.thumbnail: | |
| ## print 'Thumbnail url: %s' % thumbnail.url | |
| def Download_mp4(id): | |
| video_id = id | |
| print 'http://www.youtube.com/watch?v=' + video_id | |
| yt = YouTube() | |
| yt.url = 'http://www.youtube.com/watch?v=' + video_id | |
| video = yt.filter(extension="mp4")[0] | |
| video.download() | |
| ## elif "token" in stuff : | |
| ## url = stuff['token'][0] | |
| ## print url | |
| ## data = urlopen(url).read() | |
| ## return data | |
| return | |
| # open(video_id+'.mp4', 'wb').write(data) | |
| def SearchnDownload (term): | |
| feed = Search(term) | |
| id = feed.entry[0].id.text.split("/")[-1] | |
| data = Download_mp4(id) | |
| class downloader(threading.Thread): | |
| def __init__(self, queue): | |
| threading.Thread.__init__(self) | |
| self.queue = queue | |
| def run(self): | |
| while True: | |
| video = self.queue.get() | |
| SearchnDownload(video) | |
| self.queue.task_done() | |
| a="""""" | |
| queue = Queue.Queue() | |
| for i in range(10): | |
| t = downloader(queue) | |
| t.setDaemon(True) | |
| t.start() | |
| for line in a.split("\n"): | |
| queue.put(line.strip()) | |
| queue.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment