Skip to content

Instantly share code, notes, and snippets.

@goncalomb
Last active April 23, 2018 21:13
Show Gist options
  • Save goncalomb/63a2a19fa0715b5ba253116c9e72805b to your computer and use it in GitHub Desktop.
Save goncalomb/63a2a19fa0715b5ba253116c9e72805b to your computer and use it in GitHub Desktop.
Oddshot archiver. It's dead now.
#!/usr/bin/env php
<?php
// oddshot archiver
// Gonçalo Baltazar <[email protected]>
// I place this code in the public domain.
// https://creativecommons.org/publicdomain/zero/1.0/
$channels_to_archive = [
'Yogscast',
'Sjin',
'Duncan',
'YogsLomadia',
'Sips',
'Sips_',
'InTheLittleWood',
'HatFilms',
'YogscastKim',
'Rythian',
'Nilesy',
'CaffCast',
'Nysira',
'ZoeyProasheck',
'Turps',
'Pyrionflax',
'Vadact',
'heychrissa',
'YogPod',
'Triforce',
];
define('DATA_DIR', __DIR__ . DIRECTORY_SEPARATOR . 'data');
define('TMP_DIR', __DIR__ . DIRECTORY_SEPARATOR . 'tmp');
function ensure_dir($path) {
if (!is_dir($path)) {
mkdir($path, 0777, true);
}
return $path;
}
function clean_url($url) {
return str_replace(':', '_', str_replace('/', '_', $url));
}
function request_file($url, $path, $return=false, $post_data=null) {
$ch = curl_init();
$fp = fopen($path, 'w+b');
curl_setopt_array($ch, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => $return,
CURLOPT_FILE => $fp,
CURLOPT_FOLLOWLOCATION => true
));
if ($post_data) {
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Content-Type: application/json',
'Content-Length: ' . strlen($post_data))
);
}
$data = curl_exec($ch);
if (curl_error($ch)) {
trigger_error('curl error:' . curl_error($ch), E_USER_ERROR);
}
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($status != 200) {
trigger_error("{$url} status code {$status} (!= 200)", E_USER_WARNING);
}
curl_close($ch);
if ($return) {
rewind($fp);
$data = stream_get_contents($fp);
fclose($fp);
return $data;
}
fclose($fp);
}
# {"query":"query Routes($order_0:ShotsSortByType!,$period_1:ShotsPeriodType!) {viewer {id,...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"Yogscast\") {_shots3mOF4X:shots(first:15,order:$order_0,period:$period_1) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"order_0":"popular","period_1":"all"}}
# {"query":"query TagScreen_ViewerRelayQL($id_0:ID!,$order_1:ShotsSortByType!,$period_2:ShotsPeriodType!) {node(id:$id_0) {...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"Yogscast\") {_shots1RCnBs:shots(order:$order_1,period:$period_2,after:\"-1,XkRuiP\",first:15) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"id_0":"Vjox","order_1":"popular","period_2":"all"}}
function oddshot_list_query($channel_name, $after=null) {
if ($after) {
return '{"query":"query TagScreen_ViewerRelayQL($id_0:ID!,$order_1:ShotsSortByType!,$period_2:ShotsPeriodType!) {node(id:$id_0) {...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"' . $channel_name . '\") {_shots1RCnBs:shots(order:$order_1,period:$period_2,after:\"' . $after . '\",first:15) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"id_0":"Vjox","order_1":"popular","period_2":"all"}}';
}
return '{"query":"query Routes($order_0:ShotsSortByType!,$period_1:ShotsPeriodType!) {viewer {id,...Fb}} fragment F0 on Tag {name,id} fragment F1 on User {name,id} fragment F2 on Shot {id,score,viewerReaction} fragment F3 on Shot {score,viewerReaction,id,...F2} fragment F4 on Shot {shotType,id} fragment F5 on Shot {id,name} fragment F6 on Shot {id,...F5} fragment F7 on Shot {redditCommentsCount,redditCommentsUrl,id} fragment F8 on Shot {id,name,createdDate,viewsCount,isDeleted,isSaved,canViewerDelete,canViewerSave,_thumbnail1Q6Qmg:thumbnail(size:320),nsfw,tags {id,...F0},author {id,...F1},...F3,...F4,...F6,...F7} fragment F9 on ShotsConnection {edges {node {id,...F8},cursor},pageInfo {hasNextPage,hasPreviousPage}} fragment Fa on ShotsConnection {pageInfo {hasNextPage,hasPreviousPage},edges {node {id,name,thumbnail,isSaved,isDeleted,canViewerEdit,nsfw},cursor},...F9} fragment Fb on Viewer {_tag3oD5V5:tag(name:\"' . $channel_name . '\") {_shots3mOF4X:shots(first:15,order:$order_0,period:$period_1) {pageInfo {hasNextPage,hasPreviousPage},edges {cursor,node {id}},...Fa},id},id}","variables":{"order_0":"popular","period_1":"all"}}';
}
function oddshot_get_html_data($html, $token) {
preg_match('#<script id="preloaded-data" type="application/json">(.+?)</script>#', $html, $matches);
$data = json_decode($matches[1], true)[1]['response']['viewer'];
foreach (array_keys($data) as $k) {
if (substr($k, 0, strlen($token)) == $token) {
break;
}
}
return $data[$k];
}
function request_oddshot_list($channel_name, $path, $after=null) {
$data = request_file('https://oddshot.tv/graphql', $path, true, oddshot_list_query($channel_name, $after));
// var_dump($data);
if ($after) {
return json_decode($data, true)['data']['node']['_tag3oD5V5']['_shots1RCnBs']['edges'];
} else {
return json_decode($data, true)['data']['viewer']['_tag3oD5V5']['_shots3mOF4X']['edges'];
}
}
function request_oddshot_list_full($channel_name, $dir) {
$i = 0;
$after = null;
$result = [];
do {
$list = request_oddshot_list($channel_name, $dir . DIRECTORY_SEPARATOR . "list_{$i}.json", $after);
foreach ($list as $v) {
echo $v['cursor'], ' ', $v['node']['id'], ' ' , $v['node']['name'], "\n";
$after = $v['cursor'];
$v['node']['id'] = substr($v['cursor'], 3); // the cursor is used for urls
$result[] = $v['node'];
}
$i++;
} while (count($list));
echo 'total ', count($result), "\n";
return $result;
}
function request_oddshot_video($data, $dir) {
$video_url = "https://oddshot.tv/s/{$data['id']}";
$video_dir = $dir . DIRECTORY_SEPARATOR . $data['id'];
if (is_dir($video_dir)) {
echo "skipping {$data['id']} {$data['name']}\n";
return;
}
$tmp_dir = ensure_dir(TMP_DIR . DIRECTORY_SEPARATOR . $data['id'] . '_temp_' . time());
echo "downloading {$data['id']} {$data['name']}\n";
// html
$video_html = request_file($video_url, $tmp_dir . DIRECTORY_SEPARATOR . $data['id'] . '.html', true);
$html_data = oddshot_get_html_data($video_html, '_shot');
// thumbnail
request_file($data['thumbnail'], $tmp_dir . DIRECTORY_SEPARATOR . clean_url($data['thumbnail']), true);
request_file($html_data['thumbnail'], $tmp_dir . DIRECTORY_SEPARATOR . clean_url($html_data['thumbnail']), true);
// video
foreach ($html_data['renditions'] as $v) {
$url = strstr($v['url'], '#', true);
if (!$url) {
$url = $v['url'];
}
echo " {$url}\n";
request_file($url, $tmp_dir . DIRECTORY_SEPARATOR . clean_url($url));
}
rename($tmp_dir, $video_dir);
}
function download_channel($channel_name) {
$channel_url = "https://oddshot.tv/c/{$channel_name}";
$channel_dir = ensure_dir(DATA_DIR . DIRECTORY_SEPARATOR . $channel_name);
echo "downloading /c/{$channel_name}\n";
// html
$channel_html = request_file($channel_url, $channel_dir . DIRECTORY_SEPARATOR . $channel_name . '_' . time() . '.html', true);
$html_data = oddshot_get_html_data($channel_html, '_tag');
// avatar
$avatar_path_info = pathinfo($html_data['avatar']);
request_file($html_data['avatar'], $channel_dir . DIRECTORY_SEPARATOR . $avatar_path_info['filename'] . '_' . time() . $avatar_path_info['extension']);
echo "discovering videos\n";
$list_dir = ensure_dir($channel_dir . DIRECTORY_SEPARATOR . 'list_' . time());
$list = request_oddshot_list_full($channel_name, $list_dir);
$videos_dir = ensure_dir($channel_dir . DIRECTORY_SEPARATOR . 'videos');
foreach ($list as $video) {
request_oddshot_video($video, $videos_dir);
}
}
foreach ($channels_to_archive as $c) {
download_channel($c);
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment