Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yonas/d3ab29012b55392763c48cc4ef5ff72b to your computer and use it in GitHub Desktop.
Save yonas/d3ab29012b55392763c48cc4ef5ff72b to your computer and use it in GitHub Desktop.
#!/usr/bin/env php
<?php
/*
Made by Kudusch (blog.kudusch.de, kudusch.de, @Kudusch)
---------
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <[email protected]>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
---------
# How to use
- run the script with url that contains the embedded video. This script will look at the source code to find the master.m3u8 file (e.g. php get_tshls 'http://http://hls.daserste.de/my-video-page')
- wait for the script to download and merge all media files; every 10 second part is about 3 MB (when it's done, it will output the runtime)
- if necessary, convert the *.ts file with a media converter eg. handbrake to a *.mp4 file
- enjoy
*/
//runtime
$startTime = microtime(get_as_float);
//get url from input
$url = $argv[1];
if (!$html = getHTML($url)) {
echo 'Could not download webpage. Exiting...';
}
if (!$masterHlsUrl = getMasterHls($html)) {
echo 'Could not find master HLS stream URL. Exiting...';
exit;
}
//get stream with highest bandwith
$streamUrl = getHighBandwidthStream($masterHlsUrl);
//get array of all links to *.ts files
$list = getHlsFiles($streamUrl);
//make new directory
if (!is_dir('files')) {
mkdir('files');
}
//download all files from array, name with 3 leading zeros
//if file is longer than 166.5 minutes, adjust str_pad params
$n = 1;
if (empty($list)) {
echo 'HLS list is empty. Exiting...';
exit;
}
foreach ($list as $key) {
$number = str_pad($n, 3, "0", STR_PAD_LEFT);
print_r($n." ");
file_put_contents("files/part.".$number.".ts", fopen($key, 'r'));
$n++;
}
//merge files and delete parts
sleep(3);
$filename = getFilename($html);
mergeFiles('files', $filename);
echo "\nNew video file \"$filename.ts\" successfully created.";
echo "\nIf necessary, you can convert $filename.ts with a media converter. For example, use Handbrake to convert it to a .mp4 file.";
//echo part numbers and runtime for debugging
echo("\n\nRun in ".(microtime(get_as_float)-$startTime)." seconds.");
function getFilename($html) {
if (preg_match('/<title>(.+)<\/title>/', $html, $matches)) {
return strip_punctuation($matches[1]);
}
else {
return 'movie';
}
}
function strip_punctuation($string) {
$string = htmlspecialchars_decode($string, ENT_QUOTES | ENT_HTML5);
$string = str_replace(' ', '-', $string);
$string = preg_replace("/\pP+/", "", $string);
$string = str_replace(" +", "_", $string);
return $string;
}
function getHTML($url) {
//get content of HTML page
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
$raw = curl_exec($ch);
curl_close($ch);
return $raw;
}
//input: string, output: string
function getMasterHls($html) {
if (preg_match('/(http.+[.]m3u8)/', $html, $matches)) {
return $matches[1];
}
else {
return null;
}
}
//input: string, output: string
function getHighBandwidthStream($masterUrl) {
//get content of master.m3u8
$ch = curl_init($masterUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
$result = curl_exec($ch);
curl_close($ch);
//return link to second last stream (https://developer.apple.com/library/ios/documentation/networkinginternet/conceptual/streamingmediaguide/FrequentlyAskedQuestions/FrequentlyAskedQuestions.html#//apple_ref/doc/uid/TP40008332-CH103-SW1)
$result = explode("#", $result);
for ($i = 0; $i < 2; $i++) {
array_shift($result);
}
$length = count($result);
$result = explode("\n", $result[$length-2]);
$url = $result[1];
if (!preg_match('/^http/', $url)) {
$base_url = substr($masterUrl, 0, strrpos($masterUrl, '/') + 1);
$url = $base_url . $url;
}
return $url;
}
//input: string, output: array
function getHlsFiles($streamUrl) {
//get content of *.m3u8 file
$ch = curl_init($streamUrl);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
$raw = curl_exec($ch);
curl_close($ch);
$list_raw = explode("\n", $raw);
//extract file links
$list = array();
foreach ($list_raw as $line) {
if (preg_match('/[.]ts$/', $line)) {
if (!preg_match('/^http/', $line)) {
$base_url = substr($streamUrl, 0, strrpos($streamUrl, '/') + 1);
$line = $base_url . $line;
}
array_push($list, $line);
}
}
return $list;
}
function mergeFiles($dirName, $filename) {
//get all *.ts files in directory
if ($handle = opendir($dirName)) {
while (false !== ($file = readdir($handle))) {
if (strpos($file, ".ts") !== false) {
$fileList = $fileList." files/".$file;
}
}
closedir($handle);
}
//join and remove parts
$shellScript = "cat ".substr($fileList, 1)." >> {$filename}.ts";
shell_exec($shellScript);
shell_exec("rm -r files");
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment