Skip to content

Instantly share code, notes, and snippets.

@dmiro
Created January 7, 2015 15:46
Show Gist options
  • Select an option

  • Save dmiro/b14a55818d2beb55820a to your computer and use it in GitHub Desktop.

Select an option

Save dmiro/b14a55818d2beb55820a to your computer and use it in GitHub Desktop.
Ejemplo obtener 40 primeros resultados de una busqueda en youtube de un texto "la guerra del opio"
from pyquery import PyQuery as pq
from urllib import urlencode
for page in range(2):
params = urlencode({'filters':'video', 'page':page, 'search_query': 'la guerra del opio'})
jq = pq(url="http://www.youtube.com/results?%s" % params,
headers={"user-agent": "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20140129 Firefox/24.0"})
jq.make_links_absolute("http://www.youtube.com")
for video in jq("ol.item-section").children().items():
url = video.find("a.yt-uix-tile-link").attr("href")
title = video.find("a.yt-uix-tile-link").text()
time = video.find("span.video-time").text()
if time:
tsecs = 0
items = time.split(':')
items.reverse()
for i in range(len(items)):
tsecs += int(items[i])*(60**i)
description = video.find("div.yt-lockup-description").text()
thumb = video.find("div.yt-thumb img").attr("data-thumb")
if not thumb:
thumb = video.find("div.yt-thumb img").attr("src")
if thumb:
thumb = thumb.lstrip("//")
meta = video.find("ul.yt-lockup-meta-info").text()
print
print "url:", url
print "title:", title
print "time:", time
print "time in seconds", tsecs
print "description:", description
print "thumbnail:", thumb
print "meta:", meta
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment