Skip to content

Instantly share code, notes, and snippets.

@iKlotho
Created September 21, 2015 09:49
Show Gist options
  • Save iKlotho/140cc8fd4535ea1285a3 to your computer and use it in GitHub Desktop.
Save iKlotho/140cc8fd4535ea1285a3 to your computer and use it in GitHub Desktop.
# -*- coding: iso-8859-1 -*-
import requests, time, re, sys, json, urllib2
from downloadDM import downloadDM
from BeautifulSoup import BeautifulSoup
class lequipeParse(downloadDM):
def __init__(self):
self.base_url = "http://video.lequipe.fr/morevideos/48/1"
self.main_url = "http://video.lequipe.fr"
self.dm_url = "http://www.dailymotion.com/video"
self.links = []
self.proxies = {'https': 'https://94.23.196.68:3128'}
self.headers = {'User-agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0'}
self.jsonCollect = []
self.orginalVideoLinks = []
self.dmCollect = {'title': '', 'description': '', 'link': '', 'tags': '', 'file_id': ''}
self.collectLinks()
self.getDailymotionLinks()
self.getDownloadLinks()
self.downloadAndJsonIt()
with open('deneme.json', 'w') as f:
f.write(str(self.jsonCollect))
f.close()
def collectLinks(self):
print "Linkler toplaniyor"
r = requests.get(self.base_url)
soup = BeautifulSoup(r.content)
for i in soup.findAll('li', 'items_last_vids'): # str(time.strftime("%d")
if str(i.a.find('div', 'date').text[:2]) == '21':
self.links.append(i.a['href'])
def getDailymotionLinks(self):
for link in self.links:
key = ''
response = requests.get(self.main_url + link)
soup = BeautifulSoup(response.content)
holder = str(soup.findAll('div', id='laVideo')[0].iframe['src'])
holder = holder[holder.rfind('/'):holder.rfind('?')]
self.dmCollect['link'] = str(self.dm_url + holder)
self.dmCollect['title'] = str(soup.findAll('div', 'haut borderbas')[0].h1.text.encode('ascii', 'ignore'))
self.dmCollect['description'] = str(soup.findAll('p', 'desc')[0].text.encode('ascii', 'ignore'))
for i in soup.findAll('div', 'brique briqM')[0].findAll('a'):
key = key + ' ' + i.text.encode('ascii', 'ignore')
self.dmCollect['tags'] = str(key + ' ')
self.jsonCollect.append(self.dmCollect)
def getDownloadLinks(self):
for q in range(len(self.jsonCollect)):
self.jsonCollect[q]['link'] = self._findLinks(self.jsonCollect[q]['link'])
def downloadAndJsonIt(self):
for queue in range(len(self.jsonCollect)):
self.jsonCollect[queue]['file_id'] = self._downloadLinks(self.jsonCollect[queue]['title'],
self.jsonCollect[queue]['link'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment