hex128 · August 26, 2016 20:13
diff --git a/youtube-stats-realtime.py b/youtube-stats-realtime.py
 #!/usr/bin/python2
 # -*- coding: utf-8 -*-
 from urllib2 import urlopen, HTTPError
 from bs4 import BeautifulSoup
 from json import dumps
 from sys import stdout, exit, stdin
 from codecs import getwriter, getreader
 from signal import signal, SIGINT
 from re import compile


 def youtube(username):
    xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read()
    soup = BeautifulSoup(xml, "lxml")
    videos = int(soup.find("gd:feedlink", {
        "rel": "http://gdata.youtube.com/schemas/2007#user.uploads"
    })["counthint"])
    stats = soup.find("yt:statistics")
    subscribers = int(stats["subscribercount"])
    views = int(stats["totaluploadviews"])
    return {
        "videos_uploaded": videos,
        "channel_subscribers": subscribers,
        "total_video_views": views
    }


 def main():
    sout = getwriter("utf8")(stdout)
    sin = getreader("utf8")(stdin)
    regexp = compile("^http(s)?://www.youtube.com/user/\w+$")
    while 1:
        line = sin.readline().strip()
        if not line:
            break
        if regexp.match(line):
            username = line.rsplit("/", 1)[1]
            try:
                sout.write(dumps({"id": line, "realtime": youtube(username)}) + "\n")
            except HTTPError:
                pass


 if __name__ == "__main__":
    def signal_handler(signal, frame):
        exit(0)

    signal(SIGINT, signal_handler)
    main()
diff --git a/youtube-stats-top.py b/youtube-stats-top.py
 #!/usr/bin/python2
 # -*- coding: utf-8 -*-
 from urllib2 import Request, urlopen, HTTPError
 from bs4 import BeautifulSoup
 from time import mktime, strptime
 from json import dumps
 from sys import stdout, exit
 from codecs import getwriter
 from signal import signal, SIGINT


 def parse_users(url):
    found = []
    # SocialBlade returns 403 if urllib is detected, so we spoof UA
    req = Request(url, None, {
        "User-agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"
    })
    html = urlopen(req).read()
    soup = BeautifulSoup(html, "lxml")
    body = soup.find("div", {"id": "BodyContainer"})
    elements = body.find_all("div", {"class": "TableMonthlyStats"})
    for element in elements:
        links = element.find_all("a")
        for link in links:
            found.append(link.text)
    return found


 def unique(source):
    seen = set()
    seen_add = seen.add
    return [item for item in source if not (item in seen or seen_add(item))]


 def youtube_links(username):
    result = []
    html = urlopen("https://www.youtube.com/user/%s/about" % username).read()
    soup = BeautifulSoup(html, "lxml")
    container = soup.find("div", {"class": "branded-page-v2-col-container"})
    links = container.find_all("li", {"class": "channel-links-item"})
    for link in links:
        result.append(link.find("a")["href"])
    return result


 def youtube(username):
    xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read()
    soup = BeautifulSoup(xml, "lxml")
    link = "https://www.youtube.com/user/" + username
    name = soup.find("title").text
    stats = soup.find("yt:statistics")
    subscribers = int(stats["subscribercount"])
    views = int(stats["totaluploadviews"])
    logo = soup.find("media:thumbnail")["url"]
    published = int(mktime(strptime(soup.find("published").text[:19], "%Y-%m-%dT%H:%M:%S")))
    description = soup.find("content").text
    return {
        "id": link,
        "name": name,
        "channel_subscribers": subscribers,
        "total_video_views": views,
        "youtube_url": link,
        "logo": logo,
        "joined_at": published,
        "description": description,
        "links": youtube_links(username)
    }


 def main():
    sout = getwriter("utf8")(stdout)
    users = parse_users("http://socialblade.com/youtube/top/country/RU") + parse_users(
        "http://socialblade.com/youtube/top/country/RU/mostsubscribed") + parse_users(
        "http://socialblade.com/youtube/top/country/RU/mostviewed")
    for user in unique(users):
        try:
            sout.write(dumps(youtube(user), ensure_ascii=False) + "\n")
        except HTTPError:
            pass


 if __name__ == "__main__":
    def signal_handler(signal, frame):
        exit(0)

    signal(SIGINT, signal_handler)
    main()
diff --git a/youtube-stats-top.tr.py b/youtube-stats-top.tr.py
 #!/usr/bin/python2
 # -*- coding: utf-8 -*-
 from urllib2 import Request, urlopen, HTTPError
 from bs4 import BeautifulSoup
 from time import mktime, strptime
 from json import dumps
 from sys import stdout, exit
 from codecs import getwriter
 from signal import signal, SIGINT


 def parse_users(url):
    found = []
    # SocialBlade returns 403 if urllib is detected, so we spoof UA
    req = Request(url, None, {
        "User-agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"
    })
    html = urlopen(req).read()
    soup = BeautifulSoup(html, "lxml")
    body = soup.find("div", {"id": "BodyContainer"})
    elements = body.find_all("div", {"class": "TableMonthlyStats"})
    for element in elements:
        links = element.find_all("a")
        for link in links:
            found.append(link.text)
    return found


 def unique(source):
    seen = set()
    seen_add = seen.add
    return [item for item in source if not (item in seen or seen_add(item))]


 def youtube_links(username):
    result = []
    html = urlopen("https://www.youtube.com/user/%s/about" % username).read()
    soup = BeautifulSoup(html, "lxml")
    container = soup.find("div", {"class": "branded-page-v2-col-container"})
    links = container.find_all("li", {"class": "channel-links-item"})
    for link in links:
        result.append(link.find("a")["href"])
    return result


 def youtube(username):
    xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read()
    soup = BeautifulSoup(xml, "lxml")
    link = "https://www.youtube.com/user/" + username
    name = soup.find("title").text
    stats = soup.find("yt:statistics")
    subscribers = int(stats["subscribercount"])
    views = int(stats["totaluploadviews"])
    logo = soup.find("media:thumbnail")["url"]
    published = int(mktime(strptime(soup.find("published").text[:19], "%Y-%m-%dT%H:%M:%S")))
    description = soup.find("content").text
    return {
        "id": link,
        "name": name,
        "channel_subscribers": subscribers,
        "total_video_views": views,
        "youtube_url": link,
        "logo": logo,
        "joined_at": published,
        "description": description,
        "links": youtube_links(username)
    }


 def main():
    sout = getwriter("utf8")(stdout)
    users = parse_users("http://socialblade.com/youtube/top/country/TR") + parse_users(
        "http://socialblade.com/youtube/top/country/TR/mostsubscribed") + parse_users(
        "http://socialblade.com/youtube/top/country/TR/mostviewed")
    for user in unique(users):
        try:
            sout.write(dumps(youtube(user), ensure_ascii=False) + "\n")
        except HTTPError:
            pass


 if __name__ == "__main__":
    def signal_handler(signal, frame):
        exit(0)

    signal(SIGINT, signal_handler)
    main()
	#!/usr/bin/python2
	# -- coding: utf-8 --
	from urllib2 import urlopen, HTTPError
	from bs4 import BeautifulSoup
	from json import dumps
	from sys import stdout, exit, stdin
	from codecs import getwriter, getreader
	from signal import signal, SIGINT
	from re import compile


	def youtube(username):
	xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read()
	soup = BeautifulSoup(xml, "lxml")
	videos = int(soup.find("gd:feedlink", {
	"rel": "http://gdata.youtube.com/schemas/2007#user.uploads"
	})["counthint"])
	stats = soup.find("yt:statistics")
	subscribers = int(stats["subscribercount"])
	views = int(stats["totaluploadviews"])
	return {
	"videos_uploaded": videos,
	"channel_subscribers": subscribers,
	"total_video_views": views
	}


	def main():
	sout = getwriter("utf8")(stdout)
	sin = getreader("utf8")(stdin)
	regexp = compile("^http(s)?://www.youtube.com/user/\w+$")
	while 1:
	line = sin.readline().strip()
	if not line:
	break
	if regexp.match(line):
	username = line.rsplit("/", 1)[1]
	try:
	sout.write(dumps({"id": line, "realtime": youtube(username)}) + "\n")
	except HTTPError:
	pass


	if __name__ == "__main__":
	def signal_handler(signal, frame):
	exit(0)

	signal(SIGINT, signal_handler)
	main()
	#!/usr/bin/python2
	# -- coding: utf-8 --
	from urllib2 import Request, urlopen, HTTPError
	from bs4 import BeautifulSoup
	from time import mktime, strptime
	from json import dumps
	from sys import stdout, exit
	from codecs import getwriter
	from signal import signal, SIGINT


	def parse_users(url):
	found = []
	# SocialBlade returns 403 if urllib is detected, so we spoof UA
	req = Request(url, None, {
	"User-agent": "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"
	})
	html = urlopen(req).read()
	soup = BeautifulSoup(html, "lxml")
	body = soup.find("div", {"id": "BodyContainer"})
	elements = body.find_all("div", {"class": "TableMonthlyStats"})
	for element in elements:
	links = element.find_all("a")
	for link in links:
	found.append(link.text)
	return found


	def unique(source):
	seen = set()
	seen_add = seen.add
	return [item for item in source if not (item in seen or seen_add(item))]


	def youtube_links(username):
	result = []
	html = urlopen("https://www.youtube.com/user/%s/about" % username).read()
	soup = BeautifulSoup(html, "lxml")
	container = soup.find("div", {"class": "branded-page-v2-col-container"})
	links = container.find_all("li", {"class": "channel-links-item"})
	for link in links:
	result.append(link.find("a")["href"])
	return result


	def youtube(username):
	xml = urlopen("https://gdata.youtube.com/feeds/api/users/" + username).read()
	soup = BeautifulSoup(xml, "lxml")
	link = "https://www.youtube.com/user/" + username
	name = soup.find("title").text
	stats = soup.find("yt:statistics")
	subscribers = int(stats["subscribercount"])
	views = int(stats["totaluploadviews"])
	logo = soup.find("media:thumbnail")["url"]
	published = int(mktime(strptime(soup.find("published").text[:19], "%Y-%m-%dT%H:%M:%S")))
	description = soup.find("content").text
	return {
	"id": link,
	"name": name,
	"channel_subscribers": subscribers,
	"total_video_views": views,
	"youtube_url": link,
	"logo": logo,
	"joined_at": published,
	"description": description,
	"links": youtube_links(username)
	}


	def main():
	sout = getwriter("utf8")(stdout)
	users = parse_users("http://socialblade.com/youtube/top/country/RU") + parse_users(
	"http://socialblade.com/youtube/top/country/RU/mostsubscribed") + parse_users(
	"http://socialblade.com/youtube/top/country/RU/mostviewed")
	for user in unique(users):
	try:
	sout.write(dumps(youtube(user), ensure_ascii=False) + "\n")
	except HTTPError:
	pass


	if __name__ == "__main__":
	def signal_handler(signal, frame):
	exit(0)

	signal(SIGINT, signal_handler)
	main()