Last active March 23, 2016 07:34
Yahoo!7 Plus7 HLS URL Extractor
#!/usr/bin/env python
# Usage: [-h] [--dump-json] [--list-renditions] [--bitrate BITRATE] URL
# To download the HLS stream using ffmpeg, use the following command:
# ffmpeg -i <URL> -acodec copy -vcodec copy -bsf:a aac_adtstoasc <OUTPUT_FILE>
import sys, os, re, argparse, urllib2, json
# Constants
PLAYER_KEY = "AQ~~,AAACKW9LG-E~,UJVysOuwpLTTi79ymVZHZfuHtACyL2xM" # player key is contained within each embedding page
PUB_ID = "2376984108001" # publisher ID can be extracted from AMF response
def get_video_data_from_url(url):
data = urllib2.urlopen(url).read()
refId ="<param\s+name=['\"]@videoPlayer['\"]\s+value=['\"]ref:(\d+?)['\"][^>]+/?>", data).group(1)
playerKey ="<param\s+name=['\"]playerKey['\"]\s+value=['\"](.+?)['\"][^>]+/?>", data).group(1)
return (refId, playerKey)
def get_json(url):
return json.loads(urllib2.urlopen(url).read())
def get_video_data(playerKey, pubId, refId):
return get_json("%s&playerKey=%s&pubId=%s&refId=%s" % (API_ENDPOINT, playerKey, pubId, refId))
def main():
argparser = argparse.ArgumentParser()
argparser.add_argument('URL', help='Either the reference ID of the video, if known, or the URL of the page containing the player')
argparser.add_argument('--dump-json', help='Dump the video data JSON', action='store_true')
argparser.add_argument('--list-renditions', help='List available renditions', action='store_true')
argparser.add_argument('--bitrate', help='Preferred bitrate, in kbps', type=int)
args = argparser.parse_args()
# Default to supplied constants
playerKey = PLAYER_KEY
pubId = PUB_ID
# Try to use the supplied parameter as a number, if that fails, try to grab the required info from the url given
refId = int(args.URL)
except ValueError:
(refId, playerKey) = get_video_data_from_url(args.URL)
# Fetch the video data by the supplied reference id
data = get_video_data(playerKey=playerKey, refId=refId, pubId=pubId)
# If we were asked to dump the data only, dump it as json and abort
if args.dump_json:
print json.dumps(data, indent=2, separators=(',', ': '))
return 0
if args.list_renditions:
renditions = sorted(data["IOSRenditions"], key=lambda rendition: rendition['encodingRate'])
for idx, rendition in enumerate(renditions):
print "%i. %ikbps \t%s \t%iMB" % (idx+1, rendition['encodingRate'] / 1024, "(audio only)" if rendition['audioOnly'] else "%ix%i" % (rendition['frameWidth'], rendition['frameHeight']), rendition['size'] / 1024 / 1024)
return 1
if data is None:
print "Video not available"
return 1
# If there is a preferred bitrate, try to find the closest rendition
if args.bitrate:
rendition = sorted(data["IOSRenditions"], key=lambda rendition: abs(args.bitrate - (rendition['encodingRate']/1024)))[0]
print rendition["defaultURL"]
# Extract the FLVFullLengthURL parameter
print data["FLVFullLengthURL"]
return 0
if __name__ == "__main__":
