Skip to content

Instantly share code, notes, and snippets.

@2bj
Last active December 30, 2015 22:13
Show Gist options
  • Save 2bj/745836 to your computer and use it in GitHub Desktop.
Save 2bj/745836 to your computer and use it in GitHub Desktop.
Super.kg media (audio/video) grabber.
// fix youtube video title after/while upload (http://www.youtube.com/upload)
$x("//input[contains(@class, 'video-settings-title')]").forEach(function(el,i){
if(el.value.indexOf(" ")!= -1){
el.value = el.value.replace(' ', ' - ')
}
});
#!/usr/bin/env php
<?php date_default_timezone_set('Europe/Moscow');
new Supermediag();
/**
* Super.kg media (audio/video) grabber.
* @author 2BJ dev2bj★gmail.com
* @justforfun
* @required
* - superg @see https://gist.github.com/b97c8fb19dec39d8513f
* - mp3info (sudo apt-get install mp3info) ( ! NOT IMPLEMENTED)
*
* @TODO:
* - add interactive mode
* - add search
*
* @updated Mon Aug 12 2013 23:56:24 GMT+0600 (KGT)
*/
class Supermediag {
const TYPE_AUDIO = 'audio';
const TYPE_VIDEO = 'video';
const EXT_AUDIO = '.mp3';
const EXT_VIDEO = '.mp4';
const URL = 'http://super.kg/media/?pg={page}&only={type}';
const FAKE_USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Ubuntu/10.10 Chromium/8.0.552.215 Chrome/8.0.552.215 Safari/534.10';
/**
* @var string destination directory
*/
private $_download_dir = 'superg_downloads';
/**
* @var media type
*/
private $_type;
/**
* @var string
*/
private $_type_alias;
/**
* @var integer
*/
private $_from;
/**
* @var integer
*/
private $_to;
/**
* @var integer
*/
private $_id;
/**
* @var integer
*/
private $_until_id;
/**
* @var bool
*/
private $_dry_run = FALSE;
/**
* Help info
*/
private function _get_help()
{
global $argv;
$script_name = $argv[0];
$help =<<<HELP
Usage: $script_name -m<a|v> -f<\d+> -t<\d+> -o<\d+> -u<\d+>
\t-m, \tmedia type: v - video, a - audio
\t-f, \tpage from (integer)
\t-t, \tpage to (integer)
\t-o, \tget one media by id (integer) ( ! NOT IMPLEMENTED)
\t-u, \tget all media until this id (integer)
\t--------
\t--dry-run, \tDry run mode ;)
Example:
\t"$script_name -mv -f1 -t10"\t - get from 1 to 10 pages of video content.
\t"$script_name -ma -t3"\t - get only 3rd page of audio.
\t"$script_name -o1983"\t - get media by id 1983.
\t"$script_name -mv -u1983"\t - get all video until id 1983 from page 1.
HELP;
$this->_log($help);
}
/**
* @var string options
*/
private $_opts = 'm::f::t::o::u::';
/**
* @var array long format options
*/
private $_long_opts = array('dry-run');
/**
* @param array $opts
*/
private function _parse_opts($opts)
{
// TODO: реализовать
if (isset($opts['o']))
{
if ( ! (int)$opts['o'])
{
$this->_log(' * Error: -o must be integer');
$this->_get_help();
exit;
}
else
{
$this->_id = (int)$opts['o'];
return;
}
}
if ( ! isset($opts['m']) AND ! isset($opts['o']))
{
$this->_log(' * Error: pls, set type of media: a - audio, v - video');
$this->_get_help();
exit;
}
else if(isset($opts['m']))
{
if ( ! in_array($opts['m'], array('a', 'v')))
{
$this->_log(' * Error: unknown media type');
$this->_get_help();
exit;
}
else
{
// TODO: need refactor
if ($opts['m'] == 'a')
{
$this->_type = self::TYPE_AUDIO;
$this->_ext = self::EXT_AUDIO;
}
else if ($opts['m'] == 'v')
{
$this->_type = self::TYPE_VIDEO;
$this->_ext = self::EXT_VIDEO;
}
$this->_type_alias = $opts['m'];
}
}
// TODO: не работает until_id
if ( ! isset($opts['f']) AND ( ! isset($opts['o']) OR ! isset($opts['u'])))
{
$this->_log(' * Error: pls, set page from (-f)');
$this->_get_help();
exit;
}
if ( ! (int)$opts['f'])
{
$this->_log(' * Error: page -f must be integer');
$this->_get_help();
exit;
}
$this->_from = (int)$opts['f'];
if ( ! isset($opts['t']) AND ! isset($opts['f']))
{
$this->_log(' * Error: pls, set page to (-t)');
$this->_get_help();
exit;
}
if (isset($opts['t']))
{
if ( ! (int)$opts['t'])
{
$this->_log(' * Error: page -t must be integer');
$this->_get_help();
exit;
}
else
{
$this->_to = (int)$opts['t'];
if ($this->_to < $this->_from)
{
$this->_log(' * Error: page -t cannot be less than -f');
$this->_get_help();
exit;
}
}
}
if (isset($opts['u']))
{
if ( ! (int)$opts['u'])
{
$this->_log(' * Error: option -u must be integer');
$this->_get_help();
exit;
}
else
{
$this->_until_id = (int)$opts['u'];
$this->_log('Set until_id '.$this->_until_id);
}
}
if (isset($opts['dry-run']))
{
$this->_dry_run = TRUE;
}
}
/**
* - Поехали!
*/
public function __construct()
{
$this->_parse_opts(getopt($this->_opts, $this->_long_opts));
if ($this->_dry_run)
{
$this->_log('** DRY RUN MODE ON **');
}
if ( ! isset($this->_to))
{
$this->_to = $this->_from;
}
$this->_log('>>>> script started');
if ($this->_from != $this->_to)
{
$this->_log(' * Get pages from '.$this->_from.' to '.$this->_to);
}
if ( ! is_dir($this->_download_dir))
{
if ( ! mkdir($this->_download_dir))
{
$this->_log(' * Error: cannot create download directory. Pls, check permissions and run again.');
exit;
}
}
for($i = $this->_from; $i <= $this->_to; $i++)
{
if ($this->_stop)
{
$this->_log('Stopped by until_id '.$this->_until_id);
break;
}
$html = $this->fetch_page($this->_build_url($i));
if ($this->_type == self::TYPE_AUDIO)
{
$this->_parse_audio($html);
}
else if ($this->_type == self::TYPE_VIDEO)
{
$this->_parse_video($html);
}
}
if (count($this->_songs))
{
foreach($this->_songs as $id => $song)
{
$this->_log('Download "'.$song['author'].' - '.$song['song_name'].'" ...');
$superg_args = 'superg -'.$this->_type_alias.' '.$id.' -o "'.$this->_download_dir.'/'.$song['author'].' - '.$song['song_name'].'"';
$file = $this->_download_dir.'/'.$song['author'].' - '.$song['song_name'].$this->_ext;
if (is_file($file) AND filesize($file) > 1)
{
$this->_log(' ** File "'.$file.'" already exists. Skip >>>');
}
else
{
if ( ! $this->_dry_run)
{
system($superg_args);
}
else
{
$this->_log($superg_args);
}
}
}
}
$this->_report();
$this->_log('<<<< script exit;');
}
/**
* Generate report
*/
private function _report()
{
$this->_log(' ---- ');
$this->_log(' * TOTAL SONGS: '.count($this->_songs));
$this->_log(' * TOTAL FETCHED PAGES: '.$this->_page_cnt);
if (count($this->_fail_pages))
{
$this->_log(' * FAIL PAGES: '.count($this->_fail_pages)."\n > ".implode("\n > ", $this->_fail_pages));
}
$this->_log(' ---- ');
}
/**
* @var array
*/
private $_fail_pages = array();
/**
* @param string $url
* @return string HTML content
*/
public function fetch_page($url)
{
$this->_log(' > fetch url: '.$url.' ...');
static $context;
if ( ! $context)
{
$context = stream_context_create(array(
'http' => array(
'timeout' => 60,
'user_agent' => self::FAKE_USER_AGENT,
)
));
}
$page = file_get_contents($url, FALSE, $context);
if ( ! $page)
{
$this->_fail_pages[] = $url;
$this->_log(' * fail url: '.$url);
return '';
}
$page = html_entity_decode($page, ENT_COMPAT, 'UTF-8');
return $page;
}
/**
* @param string $html
*/
private function _parse_audio($html)
{
$re = '<a href="/media/audio/(\d+)/">([^<]+)<i>([^<]+)</i></a><br>';
preg_match_all("#$re#isUm", $html, $match);
if (isset($match[1]) && count($match[1]))
{
foreach($match[1] as $k => $id)
{
if ($this->_stop)
{
$this->_log('Stopped by until_id '.$this->_until_id);
break;
}
$this->_add_song($id, $match[2][$k], $match[3][$k]);
}
}
}
/**
* @param string $html
*/
private function _parse_video($html)
{
$re = '<a href="/media/video/(\d+)/" title=\'([^"]+)"([^"]+)"\'[^>]?+>.*</a></div>';
preg_match_all("#$re#isUm", $html, $match);
if (isset($match[1]) && count($match[1]))
{
foreach($match[1] as $k => $id)
{
if ($this->_stop)
{
$this->_log('Stopped by until_id '.$this->_until_id);
break;
}
$this->_add_song($id, $match[2][$k], $match[3][$k]);
}
}
}
/**
* @var array
*/
private $_songs = array();
/**
* @var bool
*/
private $_stop = FALSE;
private function _add_song($id, $author, $song_name)
{
$author = trim($author);
$song_name = trim(trim($song_name, '"'));
$song_name = str_replace('?', '', $song_name);
$this->_log('found song: ['.$id.'] '.$author.' - '.$song_name);
$this->_songs[$id] = array('author' => $author, 'song_name' => $song_name);
if (isset($this->_until_id) AND $this->_until_id == $id)
{
$this->_stop = TRUE;
}
}
/**
* @var int
*/
private $_page_cnt = 0;
/**
* @param inreger $page
* @return string
*/
private function _build_url($page)
{
$this->_page_cnt++;
$this->_log(' > page: '.$page);
return str_replace(array('{page}', '{type}'), array($page, $this->_type), self::URL);
}
private function _update_id3()
{
// TODO:
die;
foreach(glob('*.mp3') as $filename)
{
$artist_title = $this->_helper_get_artist_title($filename);//." ($filename)\n";
echo $filename."\n";
//print_r($artist_title);
system('mp3info "'.$filename.'" -f -a "'.$artist_title[0].'" -t "'.$artist_title[1].'"');
}
echo "\n";
}
private function _helper_get_artist_title($filename)
{
$filename = str_replace(
array('Ө','ө','Ү','ү','Ң','ң'),
array('О','о','У','у','Н','н'),
$filename
);
//$filename = iconv('UTF-8', 'ISO-8859-1//TRANSLIT', $filename);
$filename = $this->_helper_translit($filename);
$filename = trim($filename, '.mp3');
$pos = strpos($filename, '-');
$artist = substr($filename, 0, $pos-1);
$title = substr($filename, $pos+2);
return array($artist, $title);
}
private function _helper_translit($str)
{
$tr = array(
"А"=>"A","Б"=>"B","В"=>"V","Г"=>"G",
"Д"=>"D","Е"=>"E","Ж"=>"J","З"=>"Z","И"=>"I",
"Й"=>"Y","К"=>"K","Л"=>"L","М"=>"M","Н"=>"N",
"О"=>"O","П"=>"P","Р"=>"R","С"=>"S","Т"=>"T",
"У"=>"U","Ф"=>"F","Х"=>"H","Ц"=>"TS","Ч"=>"CH",
"Ш"=>"SH","Щ"=>"SCH","Ъ"=>"","Ы"=>"YI","Ь"=>"",
"Э"=>"E","Ю"=>"YU","Я"=>"YA","а"=>"a","б"=>"b",
"в"=>"v","г"=>"g","д"=>"d","е"=>"e","ж"=>"j",
"з"=>"z","и"=>"i","й"=>"y","к"=>"k","л"=>"l",
"м"=>"m","н"=>"n","о"=>"o","п"=>"p","р"=>"r",
"с"=>"s","т"=>"t","у"=>"u","ф"=>"f","х"=>"h",
"ц"=>"ts","ч"=>"ch","ш"=>"sh","щ"=>"sch","ъ"=>"y",
"ы"=>"yi","ь"=>"","э"=>"e","ю"=>"yu","я"=>"ya","Ё"=>"E","ё"=>"e",
);
return strtr($str, $tr);
}
/**
* @param string $message
*/
private function _log($message)
{
static $fp, $fp_to_file;
if ( ! $fp)
{
$fp = fopen('php://stdout', 'a+');
$fp_to_file = fopen('supermediag.log', 'a+');
fwrite($fp_to_file, "\n\n\n ------- ".date('d.m.Y H:i:s').":");
}
fwrite($fp, $message."\n");
fwrite($fp_to_file, $message."\n");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment