Last active
April 23, 2018 21:13
-
-
Save goncalomb/63a2a19fa0715b5ba253116c9e72805b to your computer and use it in GitHub Desktop.
Oddshot archiver. It's dead now.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
// oddshot archiver | |
// Gonçalo Baltazar <[email protected]> | |
// I place this code in the public domain. | |
// https://creativecommons.org/publicdomain/zero/1.0/ | |
$channels_to_archive = [ | |
'Yogscast', | |
'Sjin', | |
'Duncan', | |
'YogsLomadia', | |
'Sips', | |
'Sips_', | |
'InTheLittleWood', | |
'HatFilms', | |
'YogscastKim', | |
'Rythian', | |
'Nilesy', | |
'CaffCast', | |
'Nysira', | |
'ZoeyProasheck', | |
'Turps', | |
'Pyrionflax', | |
'Vadact', | |
'heychrissa', | |
'YogPod', | |
'Triforce', | |
]; | |
define('DATA_DIR', __DIR__ . DIRECTORY_SEPARATOR . 'data'); | |
define('TMP_DIR', __DIR__ . DIRECTORY_SEPARATOR . 'tmp'); | |
function ensure_dir($path) { | |
if (!is_dir($path)) { | |
mkdir($path, 0777, true); | |
} | |
return $path; | |
} | |
function clean_url($url) { | |
return str_replace(':', '_', str_replace('/', '_', $url)); | |
} | |
function request_file($url, $path, $return=false, $post_data=null) { | |
$ch = curl_init(); | |
$fp = fopen($path, 'w+b'); | |
curl_setopt_array($ch, array( | |
CURLOPT_URL => $url, | |
CURLOPT_RETURNTRANSFER => $return, | |
CURLOPT_FILE => $fp, | |
CURLOPT_FOLLOWLOCATION => true | |
)); | |
if ($post_data) { | |
curl_setopt($ch, CURLOPT_POST, 1); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); | |
curl_setopt($ch, CURLOPT_HTTPHEADER, array( | |
'Content-Type: application/json', | |
'Content-Length: ' . strlen($post_data)) | |
); | |
} | |
$data = curl_exec($ch); | |
if (curl_error($ch)) { | |
trigger_error('curl error:' . curl_error($ch), E_USER_ERROR); | |
} | |
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE); | |
if ($status != 200) { | |
trigger_error("{$url} status code {$status} (!= 200)", E_USER_WARNING); | |
} | |
curl_close($ch); | |
if ($return) { | |
rewind($fp); | |
$data = stream_get_contents($fp); | |
fclose($fp); | |
return $data; | |
} | |
fclose($fp); | |
} | |
# {"query":"query Routes($order_0:ShotsSortByType!,$period_1:ShotsPeriodType!) {viewer {id,...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"Yogscast\") {_shots3mOF4X:shots(first:15,order:$order_0,period:$period_1) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"order_0":"popular","period_1":"all"}} | |
# {"query":"query TagScreen_ViewerRelayQL($id_0:ID!,$order_1:ShotsSortByType!,$period_2:ShotsPeriodType!) {node(id:$id_0) {...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"Yogscast\") {_shots1RCnBs:shots(order:$order_1,period:$period_2,after:\"-1,XkRuiP\",first:15) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"id_0":"Vjox","order_1":"popular","period_2":"all"}} | |
function oddshot_list_query($channel_name, $after=null) { | |
if ($after) { | |
return '{"query":"query TagScreen_ViewerRelayQL($id_0:ID!,$order_1:ShotsSortByType!,$period_2:ShotsPeriodType!) {node(id:$id_0) {...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"' . $channel_name . '\") {_shots1RCnBs:shots(order:$order_1,period:$period_2,after:\"' . $after . '\",first:15) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"id_0":"Vjox","order_1":"popular","period_2":"all"}}'; | |
} | |
return '{"query":"query Routes($order_0:ShotsSortByType!,$period_1:ShotsPeriodType!) {viewer {id,...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"' . $channel_name . '\") {_shots3mOF4X:shots(first:15,order:$order_0,period:$period_1) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"order_0":"popular","period_1":"all"}}'; | |
} | |
function oddshot_get_html_data($html, $token) { | |
preg_match('#<script id="preloaded-data" type="application/json">(.+?)</script>#', $html, $matches); | |
$data = json_decode($matches[1], true)[1]['response']['viewer']; | |
foreach (array_keys($data) as $k) { | |
if (substr($k, 0, strlen($token)) == $token) { | |
break; | |
} | |
} | |
return $data[$k]; | |
} | |
function request_oddshot_list($channel_name, $path, $after=null) { | |
$data = request_file('https://oddshot.tv/graphql', $path, true, oddshot_list_query($channel_name, $after)); | |
// var_dump($data); | |
if ($after) { | |
return json_decode($data, true)['data']['node']['_tag3oD5V5']['_shots1RCnBs']['edges']; | |
} else { | |
return json_decode($data, true)['data']['viewer']['_tag3oD5V5']['_shots3mOF4X']['edges']; | |
} | |
} | |
function request_oddshot_list_full($channel_name, $dir) { | |
$i = 0; | |
$after = null; | |
$result = []; | |
do { | |
$list = request_oddshot_list($channel_name, $dir . DIRECTORY_SEPARATOR . "list_{$i}.json", $after); | |
foreach ($list as $v) { | |
echo $v['cursor'], ' ', $v['node']['id'], ' ' , $v['node']['name'], "\n"; | |
$after = $v['cursor']; | |
$v['node']['id'] = substr($v['cursor'], 3); // the cursor is used for urls | |
$result[] = $v['node']; | |
} | |
$i++; | |
} while (count($list)); | |
echo 'total ', count($result), "\n"; | |
return $result; | |
} | |
function request_oddshot_video($data, $dir) { | |
$video_url = "https://oddshot.tv/s/{$data['id']}"; | |
$video_dir = $dir . DIRECTORY_SEPARATOR . $data['id']; | |
if (is_dir($video_dir)) { | |
echo "skipping {$data['id']} {$data['name']}\n"; | |
return; | |
} | |
$tmp_dir = ensure_dir(TMP_DIR . DIRECTORY_SEPARATOR . $data['id'] . '_temp_' . time()); | |
echo "downloading {$data['id']} {$data['name']}\n"; | |
// html | |
$video_html = request_file($video_url, $tmp_dir . DIRECTORY_SEPARATOR . $data['id'] . '.html', true); | |
$html_data = oddshot_get_html_data($video_html, '_shot'); | |
// thumbnail | |
request_file($data['thumbnail'], $tmp_dir . DIRECTORY_SEPARATOR . clean_url($data['thumbnail']), true); | |
request_file($html_data['thumbnail'], $tmp_dir . DIRECTORY_SEPARATOR . clean_url($html_data['thumbnail']), true); | |
// video | |
foreach ($html_data['renditions'] as $v) { | |
$url = strstr($v['url'], '#', true); | |
if (!$url) { | |
$url = $v['url']; | |
} | |
echo " {$url}\n"; | |
request_file($url, $tmp_dir . DIRECTORY_SEPARATOR . clean_url($url)); | |
} | |
rename($tmp_dir, $video_dir); | |
} | |
function download_channel($channel_name) { | |
$channel_url = "https://oddshot.tv/c/{$channel_name}"; | |
$channel_dir = ensure_dir(DATA_DIR . DIRECTORY_SEPARATOR . $channel_name); | |
echo "downloading /c/{$channel_name}\n"; | |
// html | |
$channel_html = request_file($channel_url, $channel_dir . DIRECTORY_SEPARATOR . $channel_name . '_' . time() . '.html', true); | |
$html_data = oddshot_get_html_data($channel_html, '_tag'); | |
// avatar | |
$avatar_path_info = pathinfo($html_data['avatar']); | |
request_file($html_data['avatar'], $channel_dir . DIRECTORY_SEPARATOR . $avatar_path_info['filename'] . '_' . time() . $avatar_path_info['extension']); | |
echo "discovering videos\n"; | |
$list_dir = ensure_dir($channel_dir . DIRECTORY_SEPARATOR . 'list_' . time()); | |
$list = request_oddshot_list_full($channel_name, $list_dir); | |
$videos_dir = ensure_dir($channel_dir . DIRECTORY_SEPARATOR . 'videos'); | |
foreach ($list as $video) { | |
request_oddshot_video($video, $videos_dir); | |
} | |
} | |
foreach ($channels_to_archive as $c) { | |
download_channel($c); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment