Created
January 7, 2015 15:46
-
-
Save dmiro/b14a55818d2beb55820a to your computer and use it in GitHub Desktop.
Ejemplo obtener 40 primeros resultados de una busqueda en youtube de un texto "la guerra del opio"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyquery import PyQuery as pq | |
| from urllib import urlencode | |
| for page in range(2): | |
| params = urlencode({'filters':'video', 'page':page, 'search_query': 'la guerra del opio'}) | |
| jq = pq(url="http://www.youtube.com/results?%s" % params, | |
| headers={"user-agent": "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20140129 Firefox/24.0"}) | |
| jq.make_links_absolute("http://www.youtube.com") | |
| for video in jq("ol.item-section").children().items(): | |
| url = video.find("a.yt-uix-tile-link").attr("href") | |
| title = video.find("a.yt-uix-tile-link").text() | |
| time = video.find("span.video-time").text() | |
| if time: | |
| tsecs = 0 | |
| items = time.split(':') | |
| items.reverse() | |
| for i in range(len(items)): | |
| tsecs += int(items[i])*(60**i) | |
| description = video.find("div.yt-lockup-description").text() | |
| thumb = video.find("div.yt-thumb img").attr("data-thumb") | |
| if not thumb: | |
| thumb = video.find("div.yt-thumb img").attr("src") | |
| if thumb: | |
| thumb = thumb.lstrip("//") | |
| meta = video.find("ul.yt-lockup-meta-info").text() | |
| print "url:", url | |
| print "title:", title | |
| print "time:", time | |
| print "time in seconds", tsecs | |
| print "description:", description | |
| print "thumbnail:", thumb | |
| print "meta:", meta |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment