Last active
May 8, 2016 05:14
-
-
Save sauravtom/8666097 to your computer and use it in GitHub Desktop.
Scraper behind youtube channel 9vine http://youtu.be/G7Z__m02eFQ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib2 | |
import urllib | |
import time | |
import datetime | |
import sys | |
import os | |
from local_settings import username, password | |
chanells = ['TheFunnyVines','vinehumor','ScienceVines','TheFunnyVine','The_Best_Vines','BestOfVines'] | |
def filter(s): | |
s=s.strip() | |
return "".join(i for i in s if ord(i)<128) | |
def gen_url(): | |
url = "https://twitter.com/%s" % chanells[2] | |
soup = BeautifulSoup( urllib2.urlopen(url).read() ) | |
arr=[] | |
for i in soup.find_all('p',{'class':'js-tweet-text'}): | |
try: | |
url = i.find('span',{'class' : 'js-display-url'}).get_text() | |
if 'vine.co' in url: | |
arr.append(url) | |
except Exception as e: | |
print e | |
return arr[:11] | |
def gen_info(url): | |
d = {} | |
url = 'https://'+url | |
#print url | |
try: soup = BeautifulSoup( urllib2.urlopen(url).read() ) | |
except: return None | |
d['url'] = url | |
try : d['video'] = soup.find("source").get("src") | |
except Exception as e: | |
print e | |
d['video'] = 'XXXXX' | |
try : d['user'] = soup.find('p',{'class':'username'}).get_text() | |
except Exception as e: | |
print e | |
d['user'] = 'XXXXX' | |
try : d['tweet'] = soup.find('p',{'class':'description'}).get_text() | |
except Exception as e: | |
print e | |
d['tweet'] = 'XXXXX' | |
return d | |
def post_to_youtube(title,description,tags): | |
description = description.replace('\"','\'') | |
cmd = ''' | |
youtube-upload video.avi --email=%s --password=%s --title="%s" --description="%s" --category=Comedy --keywords="%s" | |
'''%(username,password,title,description,tags) | |
print os.system(cmd) | |
#to clean the directory | |
os.system('rm -rf dump') | |
def main(): | |
description='' | |
main_arr = [gen_info(url) for url in gen_url()] | |
for vine in main_arr[:10]: | |
index = main_arr.index(vine) | |
print vine['video'] | |
urllib.urlretrieve(vine['video'], "dump/%s.mp4"%index) | |
description += "%d: %s -by user %s %s\n\n"%(index,filter(vine['tweet']),filter(vine['user']),filter(vine['url']) ) | |
with open('dump/info.txt', 'a') as myFile: | |
myFile.write('%s \n\n funny,hysterical,hot,video,toddler,vine,Laughing,Cute,Best'%(description) ) | |
print main_arr | |
print os.system("melt dump/*.mp4 -consumer avformat:video.avi acodec=libmp3lame vcodec=libx264 brate=5000k s=854x480") | |
title = 'Best Vines Compilation: %s'%( time.strftime("%d %B %Y") ) | |
tags = 'funny,hysterical,hot,video,toddler,vine,Laughing,Cute,Best' | |
post_to_youtube(title,description,tags) | |
if __name__ == '__main__': | |
print main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
After downloading this file, make a new file in the same directory named "local_settings.py" and add your username and password of google account associated with youtube in it as a dictionary.