Bigpet · August 29, 2015 14:05
diff --git a/twitch_past_broadcast_downloader.py b/twitch_past_broadcast_downloader.py
 import requests
 import sys
 import json
 import re
 import os
 import string
 import argparse

 BASE_URL = 'http://api.justin.tv'
 LOGFILE_NAME = 'log.csv'

 def safe_filename(title):
    """ returns a valid filename for the 'title' string """
    allowed = "-_.() " + string.ascii_letters + string.digits
    return "".join([c for c in title if c in allowed]) 

 def download_file(url, local_filename):
    print("downloading {0}".format(local_filename))
    CS = 1024
    done = 0
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=CS):
            if not chunk: # filter out keep-alive new chunks
                continue
            f.write(chunk)
            f.flush()
            done += CS
            sys.stdout.write("\r{0:>7.2f} MB".format(done/float(pow(1024,2))))


    print("done\n")

 def download_broadcast(id_):
    """ download all video parts for broadcast 'id_' """

    pattern = '{base}/api/broadcast/by_archive/{id_}.json?onsite=true'

    url = pattern.format(base=BASE_URL, id_=id_)
    r = requests.get(url)
    if r.status_code != 200:
        raise Exception("API returned {0}".format(r.status_code))
    try:
        chunks = r.json()
    except ValueError as e:
        print("API did not return valid JSON: {}".format(e))
        print("{}".format(r.text))
        quit()
    label = safe_filename(chunks[0]['title'])
    print(chunks)
    for nr, chunk in enumerate(sorted(chunks, key=lambda x: x['start_timestamp'])):
        video_url = chunk['video_file_url']
        (name,ext) = os.path.splitext(os.path.split(video_url)[1])
        filename = "{0}_{1}{2}".format(name,str(nr),ext)
        #I know that this is not a typical csv format (I should not include column names each line)
        with open(LOGFILE_NAME,"a") as logfile:
            logfile.write("game, {:<30}, vodid, {:<10}, start_timestamp, {:<10}, length, {:<5}, filesize, {:<10}, link_url, {:<50}, start_time, {:<25}, title, {:<500}\r\n".format(chunk['meta_game'],chunk['broadcast_id'],chunk['start_timestamp'],chunk['length'],chunk['file_size'],chunk['link_url'],chunk['start_time'],chunk['title']))
        download_file(video_url, filename)


 if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('video_id', help='twitch video id')
    args = parser.parse_args()
    download_broadcast(args.video_id)
	import requests
	import sys
	import json
	import re
	import os
	import string
	import argparse

	BASE_URL = 'http://api.justin.tv'
	LOGFILE_NAME = 'log.csv'

	def safe_filename(title):
	""" returns a valid filename for the 'title' string """
	allowed = "-_.() " + string.ascii_letters + string.digits
	return "".join([c for c in title if c in allowed])

	def download_file(url, local_filename):
	print("downloading {0}".format(local_filename))
	CS = 1024
	done = 0
	r = requests.get(url, stream=True)
	with open(local_filename, 'wb') as f:
	for chunk in r.iter_content(chunk_size=CS):
	if not chunk: # filter out keep-alive new chunks
	continue
	f.write(chunk)
	f.flush()
	done += CS
	sys.stdout.write("\r{0:>7.2f} MB".format(done/float(pow(1024,2))))


	print("done\n")

	def download_broadcast(id_):
	""" download all video parts for broadcast 'id_' """

	pattern = '{base}/api/broadcast/by_archive/{id_}.json?onsite=true'

	url = pattern.format(base=BASE_URL, id_=id_)
	r = requests.get(url)
	if r.status_code != 200:
	raise Exception("API returned {0}".format(r.status_code))
	try:
	chunks = r.json()
	except ValueError as e:
	print("API did not return valid JSON: {}".format(e))
	print("{}".format(r.text))
	quit()
	label = safe_filename(chunks[0]['title'])
	print(chunks)
	for nr, chunk in enumerate(sorted(chunks, key=lambda x: x['start_timestamp'])):
	video_url = chunk['video_file_url']
	(name,ext) = os.path.splitext(os.path.split(video_url)[1])
	filename = "{0}_{1}{2}".format(name,str(nr),ext)
	#I know that this is not a typical csv format (I should not include column names each line)
	with open(LOGFILE_NAME,"a") as logfile:
	logfile.write("game, {:<30}, vodid, {:<10}, start_timestamp, {:<10}, length, {:<5}, filesize, {:<10}, link_url, {:<50}, start_time, {:<25}, title, {:<500}\r\n".format(chunk['meta_game'],chunk['broadcast_id'],chunk['start_timestamp'],chunk['length'],chunk['file_size'],chunk['link_url'],chunk['start_time'],chunk['title']))
	download_file(video_url, filename)


	if __name__=="__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument('video_id', help='twitch video id')
	args = parser.parse_args()
	download_broadcast(args.video_id)