Last active
August 26, 2016 20:13
-
-
Save hex128/2886c2ded8e922331c7a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
# -*- coding: utf-8 -*- | |
from urllib2 import urlopen, HTTPError | |
from bs4 import BeautifulSoup | |
from json import dumps | |
from sys import stdout, exit, stdin | |
from codecs import getwriter, getreader | |
from signal import signal, SIGINT | |
from re import compile | |
def youtube(username): | |
xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read() | |
soup = BeautifulSoup(xml, "lxml") | |
videos = int(soup.find("gd:feedlink", { | |
"rel": "http://gdata.youtube.com/schemas/2007#user.uploads" | |
})["counthint"]) | |
stats = soup.find("yt:statistics") | |
subscribers = int(stats["subscribercount"]) | |
views = int(stats["totaluploadviews"]) | |
return { | |
"videos_uploaded": videos, | |
"channel_subscribers": subscribers, | |
"total_video_views": views | |
} | |
def main(): | |
sout = getwriter("utf8")(stdout) | |
sin = getreader("utf8")(stdin) | |
regexp = compile("^http(s)?://www.youtube.com/user/\w+$") | |
while 1: | |
line = sin.readline().strip() | |
if not line: | |
break | |
if regexp.match(line): | |
username = line.rsplit("/", 1)[1] | |
try: | |
sout.write(dumps({"id": line, "realtime": youtube(username)}) + "\n") | |
except HTTPError: | |
pass | |
if __name__ == "__main__": | |
def signal_handler(signal, frame): | |
exit(0) | |
signal(SIGINT, signal_handler) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
# -*- coding: utf-8 -*- | |
from urllib2 import Request, urlopen, HTTPError | |
from bs4 import BeautifulSoup | |
from time import mktime, strptime | |
from json import dumps | |
from sys import stdout, exit | |
from codecs import getwriter | |
from signal import signal, SIGINT | |
def parse_users(url): | |
found = [] | |
# SocialBlade returns 403 if urllib is detected, so we spoof UA | |
req = Request(url, None, { | |
"User-agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)" | |
}) | |
html = urlopen(req).read() | |
soup = BeautifulSoup(html, "lxml") | |
body = soup.find("div", {"id": "BodyContainer"}) | |
elements = body.find_all("div", {"class": "TableMonthlyStats"}) | |
for element in elements: | |
links = element.find_all("a") | |
for link in links: | |
found.append(link.text) | |
return found | |
def unique(source): | |
seen = set() | |
seen_add = seen.add | |
return [item for item in source if not (item in seen or seen_add(item))] | |
def youtube_links(username): | |
result = [] | |
html = urlopen("https://www.youtube.com/user/%s/about" % username).read() | |
soup = BeautifulSoup(html, "lxml") | |
container = soup.find("div", {"class": "branded-page-v2-col-container"}) | |
links = container.find_all("li", {"class": "channel-links-item"}) | |
for link in links: | |
result.append(link.find("a")["href"]) | |
return result | |
def youtube(username): | |
xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read() | |
soup = BeautifulSoup(xml, "lxml") | |
link = "https://www.youtube.com/user/" + username | |
name = soup.find("title").text | |
stats = soup.find("yt:statistics") | |
subscribers = int(stats["subscribercount"]) | |
views = int(stats["totaluploadviews"]) | |
logo = soup.find("media:thumbnail")["url"] | |
published = int(mktime(strptime(soup.find("published").text[:19], "%Y-%m-%dT%H:%M:%S"))) | |
description = soup.find("content").text | |
return { | |
"id": link, | |
"name": name, | |
"channel_subscribers": subscribers, | |
"total_video_views": views, | |
"youtube_url": link, | |
"logo": logo, | |
"joined_at": published, | |
"description": description, | |
"links": youtube_links(username) | |
} | |
def main(): | |
sout = getwriter("utf8")(stdout) | |
users = parse_users("http://socialblade.com/youtube/top/country/RU") + parse_users( | |
"http://socialblade.com/youtube/top/country/RU/mostsubscribed") + parse_users( | |
"http://socialblade.com/youtube/top/country/RU/mostviewed") | |
for user in unique(users): | |
try: | |
sout.write(dumps(youtube(user), ensure_ascii=False) + "\n") | |
except HTTPError: | |
pass | |
if __name__ == "__main__": | |
def signal_handler(signal, frame): | |
exit(0) | |
signal(SIGINT, signal_handler) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
# -*- coding: utf-8 -*- | |
from urllib2 import Request, urlopen, HTTPError | |
from bs4 import BeautifulSoup | |
from time import mktime, strptime | |
from json import dumps | |
from sys import stdout, exit | |
from codecs import getwriter | |
from signal import signal, SIGINT | |
def parse_users(url): | |
found = [] | |
# SocialBlade returns 403 if urllib is detected, so we spoof UA | |
req = Request(url, None, { | |
"User-agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)" | |
}) | |
html = urlopen(req).read() | |
soup = BeautifulSoup(html, "lxml") | |
body = soup.find("div", {"id": "BodyContainer"}) | |
elements = body.find_all("div", {"class": "TableMonthlyStats"}) | |
for element in elements: | |
links = element.find_all("a") | |
for link in links: | |
found.append(link.text) | |
return found | |
def unique(source): | |
seen = set() | |
seen_add = seen.add | |
return [item for item in source if not (item in seen or seen_add(item))] | |
def youtube_links(username): | |
result = [] | |
html = urlopen("https://www.youtube.com/user/%s/about" % username).read() | |
soup = BeautifulSoup(html, "lxml") | |
container = soup.find("div", {"class": "branded-page-v2-col-container"}) | |
links = container.find_all("li", {"class": "channel-links-item"}) | |
for link in links: | |
result.append(link.find("a")["href"]) | |
return result | |
def youtube(username): | |
xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read() | |
soup = BeautifulSoup(xml, "lxml") | |
link = "https://www.youtube.com/user/" + username | |
name = soup.find("title").text | |
stats = soup.find("yt:statistics") | |
subscribers = int(stats["subscribercount"]) | |
views = int(stats["totaluploadviews"]) | |
logo = soup.find("media:thumbnail")["url"] | |
published = int(mktime(strptime(soup.find("published").text[:19], "%Y-%m-%dT%H:%M:%S"))) | |
description = soup.find("content").text | |
return { | |
"id": link, | |
"name": name, | |
"channel_subscribers": subscribers, | |
"total_video_views": views, | |
"youtube_url": link, | |
"logo": logo, | |
"joined_at": published, | |
"description": description, | |
"links": youtube_links(username) | |
} | |
def main(): | |
sout = getwriter("utf8")(stdout) | |
users = parse_users("http://socialblade.com/youtube/top/country/TR") + parse_users( | |
"http://socialblade.com/youtube/top/country/TR/mostsubscribed") + parse_users( | |
"http://socialblade.com/youtube/top/country/TR/mostviewed") | |
for user in unique(users): | |
try: | |
sout.write(dumps(youtube(user), ensure_ascii=False) + "\n") | |
except HTTPError: | |
pass | |
if __name__ == "__main__": | |
def signal_handler(signal, frame): | |
exit(0) | |
signal(SIGINT, signal_handler) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment