Last active
December 30, 2015 22:13
-
-
Save 2bj/745836 to your computer and use it in GitHub Desktop.
Super.kg media (audio/video) grabber.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // fix youtube video title after/while upload (http://www.youtube.com/upload) | |
| $x("//input[contains(@class, 'video-settings-title')]").forEach(function(el,i){ | |
| if(el.value.indexOf(" ")!= -1){ | |
| el.value = el.value.replace(' ', ' - ') | |
| } | |
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env php | |
| <?php date_default_timezone_set('Europe/Moscow'); | |
| new Supermediag(); | |
| /** | |
| * Super.kg media (audio/video) grabber. | |
| * @author 2BJ dev2bj★gmail.com | |
| * @justforfun | |
| * @required | |
| * - superg @see https://gist.github.com/b97c8fb19dec39d8513f | |
| * - mp3info (sudo apt-get install mp3info) ( ! NOT IMPLEMENTED) | |
| * | |
| * @TODO: | |
| * - add interactive mode | |
| * - add search | |
| * | |
| * @updated Mon Aug 12 2013 23:56:24 GMT+0600 (KGT) | |
| */ | |
| class Supermediag { | |
| const TYPE_AUDIO = 'audio'; | |
| const TYPE_VIDEO = 'video'; | |
| const EXT_AUDIO = '.mp3'; | |
| const EXT_VIDEO = '.mp4'; | |
| const URL = 'http://super.kg/media/?pg={page}&only={type}'; | |
| const FAKE_USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Ubuntu/10.10 Chromium/8.0.552.215 Chrome/8.0.552.215 Safari/534.10'; | |
| /** | |
| * @var string destination directory | |
| */ | |
| private $_download_dir = 'superg_downloads'; | |
| /** | |
| * @var media type | |
| */ | |
| private $_type; | |
| /** | |
| * @var string | |
| */ | |
| private $_type_alias; | |
| /** | |
| * @var integer | |
| */ | |
| private $_from; | |
| /** | |
| * @var integer | |
| */ | |
| private $_to; | |
| /** | |
| * @var integer | |
| */ | |
| private $_id; | |
| /** | |
| * @var integer | |
| */ | |
| private $_until_id; | |
| /** | |
| * @var bool | |
| */ | |
| private $_dry_run = FALSE; | |
| /** | |
| * Help info | |
| */ | |
| private function _get_help() | |
| { | |
| global $argv; | |
| $script_name = $argv[0]; | |
| $help =<<<HELP | |
| Usage: $script_name -m<a|v> -f<\d+> -t<\d+> -o<\d+> -u<\d+> | |
| \t-m, \tmedia type: v - video, a - audio | |
| \t-f, \tpage from (integer) | |
| \t-t, \tpage to (integer) | |
| \t-o, \tget one media by id (integer) ( ! NOT IMPLEMENTED) | |
| \t-u, \tget all media until this id (integer) | |
| \t-------- | |
| \t--dry-run, \tDry run mode ;) | |
| Example: | |
| \t"$script_name -mv -f1 -t10"\t - get from 1 to 10 pages of video content. | |
| \t"$script_name -ma -t3"\t - get only 3rd page of audio. | |
| \t"$script_name -o1983"\t - get media by id 1983. | |
| \t"$script_name -mv -u1983"\t - get all video until id 1983 from page 1. | |
| HELP; | |
| $this->_log($help); | |
| } | |
| /** | |
| * @var string options | |
| */ | |
| private $_opts = 'm::f::t::o::u::'; | |
| /** | |
| * @var array long format options | |
| */ | |
| private $_long_opts = array('dry-run'); | |
| /** | |
| * @param array $opts | |
| */ | |
| private function _parse_opts($opts) | |
| { | |
| // TODO: реализовать | |
| if (isset($opts['o'])) | |
| { | |
| if ( ! (int)$opts['o']) | |
| { | |
| $this->_log(' * Error: -o must be integer'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| else | |
| { | |
| $this->_id = (int)$opts['o']; | |
| return; | |
| } | |
| } | |
| if ( ! isset($opts['m']) AND ! isset($opts['o'])) | |
| { | |
| $this->_log(' * Error: pls, set type of media: a - audio, v - video'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| else if(isset($opts['m'])) | |
| { | |
| if ( ! in_array($opts['m'], array('a', 'v'))) | |
| { | |
| $this->_log(' * Error: unknown media type'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| else | |
| { | |
| // TODO: need refactor | |
| if ($opts['m'] == 'a') | |
| { | |
| $this->_type = self::TYPE_AUDIO; | |
| $this->_ext = self::EXT_AUDIO; | |
| } | |
| else if ($opts['m'] == 'v') | |
| { | |
| $this->_type = self::TYPE_VIDEO; | |
| $this->_ext = self::EXT_VIDEO; | |
| } | |
| $this->_type_alias = $opts['m']; | |
| } | |
| } | |
| // TODO: не работает until_id | |
| if ( ! isset($opts['f']) AND ( ! isset($opts['o']) OR ! isset($opts['u']))) | |
| { | |
| $this->_log(' * Error: pls, set page from (-f)'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| if ( ! (int)$opts['f']) | |
| { | |
| $this->_log(' * Error: page -f must be integer'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| $this->_from = (int)$opts['f']; | |
| if ( ! isset($opts['t']) AND ! isset($opts['f'])) | |
| { | |
| $this->_log(' * Error: pls, set page to (-t)'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| if (isset($opts['t'])) | |
| { | |
| if ( ! (int)$opts['t']) | |
| { | |
| $this->_log(' * Error: page -t must be integer'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| else | |
| { | |
| $this->_to = (int)$opts['t']; | |
| if ($this->_to < $this->_from) | |
| { | |
| $this->_log(' * Error: page -t cannot be less than -f'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| } | |
| } | |
| if (isset($opts['u'])) | |
| { | |
| if ( ! (int)$opts['u']) | |
| { | |
| $this->_log(' * Error: option -u must be integer'); | |
| $this->_get_help(); | |
| exit; | |
| } | |
| else | |
| { | |
| $this->_until_id = (int)$opts['u']; | |
| $this->_log('Set until_id '.$this->_until_id); | |
| } | |
| } | |
| if (isset($opts['dry-run'])) | |
| { | |
| $this->_dry_run = TRUE; | |
| } | |
| } | |
| /** | |
| * - Поехали! | |
| */ | |
| public function __construct() | |
| { | |
| $this->_parse_opts(getopt($this->_opts, $this->_long_opts)); | |
| if ($this->_dry_run) | |
| { | |
| $this->_log('** DRY RUN MODE ON **'); | |
| } | |
| if ( ! isset($this->_to)) | |
| { | |
| $this->_to = $this->_from; | |
| } | |
| $this->_log('>>>> script started'); | |
| if ($this->_from != $this->_to) | |
| { | |
| $this->_log(' * Get pages from '.$this->_from.' to '.$this->_to); | |
| } | |
| if ( ! is_dir($this->_download_dir)) | |
| { | |
| if ( ! mkdir($this->_download_dir)) | |
| { | |
| $this->_log(' * Error: cannot create download directory. Pls, check permissions and run again.'); | |
| exit; | |
| } | |
| } | |
| for($i = $this->_from; $i <= $this->_to; $i++) | |
| { | |
| if ($this->_stop) | |
| { | |
| $this->_log('Stopped by until_id '.$this->_until_id); | |
| break; | |
| } | |
| $html = $this->fetch_page($this->_build_url($i)); | |
| if ($this->_type == self::TYPE_AUDIO) | |
| { | |
| $this->_parse_audio($html); | |
| } | |
| else if ($this->_type == self::TYPE_VIDEO) | |
| { | |
| $this->_parse_video($html); | |
| } | |
| } | |
| if (count($this->_songs)) | |
| { | |
| foreach($this->_songs as $id => $song) | |
| { | |
| $this->_log('Download "'.$song['author'].' - '.$song['song_name'].'" ...'); | |
| $superg_args = 'superg -'.$this->_type_alias.' '.$id.' -o "'.$this->_download_dir.'/'.$song['author'].' - '.$song['song_name'].'"'; | |
| $file = $this->_download_dir.'/'.$song['author'].' - '.$song['song_name'].$this->_ext; | |
| if (is_file($file) AND filesize($file) > 1) | |
| { | |
| $this->_log(' ** File "'.$file.'" already exists. Skip >>>'); | |
| } | |
| else | |
| { | |
| if ( ! $this->_dry_run) | |
| { | |
| system($superg_args); | |
| } | |
| else | |
| { | |
| $this->_log($superg_args); | |
| } | |
| } | |
| } | |
| } | |
| $this->_report(); | |
| $this->_log('<<<< script exit;'); | |
| } | |
| /** | |
| * Generate report | |
| */ | |
| private function _report() | |
| { | |
| $this->_log(' ---- '); | |
| $this->_log(' * TOTAL SONGS: '.count($this->_songs)); | |
| $this->_log(' * TOTAL FETCHED PAGES: '.$this->_page_cnt); | |
| if (count($this->_fail_pages)) | |
| { | |
| $this->_log(' * FAIL PAGES: '.count($this->_fail_pages)."\n > ".implode("\n > ", $this->_fail_pages)); | |
| } | |
| $this->_log(' ---- '); | |
| } | |
| /** | |
| * @var array | |
| */ | |
| private $_fail_pages = array(); | |
| /** | |
| * @param string $url | |
| * @return string HTML content | |
| */ | |
| public function fetch_page($url) | |
| { | |
| $this->_log(' > fetch url: '.$url.' ...'); | |
| static $context; | |
| if ( ! $context) | |
| { | |
| $context = stream_context_create(array( | |
| 'http' => array( | |
| 'timeout' => 60, | |
| 'user_agent' => self::FAKE_USER_AGENT, | |
| ) | |
| )); | |
| } | |
| $page = file_get_contents($url, FALSE, $context); | |
| if ( ! $page) | |
| { | |
| $this->_fail_pages[] = $url; | |
| $this->_log(' * fail url: '.$url); | |
| return ''; | |
| } | |
| $page = html_entity_decode($page, ENT_COMPAT, 'UTF-8'); | |
| return $page; | |
| } | |
| /** | |
| * @param string $html | |
| */ | |
| private function _parse_audio($html) | |
| { | |
| $re = '<a href="/media/audio/(\d+)/">([^<]+)<i>([^<]+)</i></a><br>'; | |
| preg_match_all("#$re#isUm", $html, $match); | |
| if (isset($match[1]) && count($match[1])) | |
| { | |
| foreach($match[1] as $k => $id) | |
| { | |
| if ($this->_stop) | |
| { | |
| $this->_log('Stopped by until_id '.$this->_until_id); | |
| break; | |
| } | |
| $this->_add_song($id, $match[2][$k], $match[3][$k]); | |
| } | |
| } | |
| } | |
| /** | |
| * @param string $html | |
| */ | |
| private function _parse_video($html) | |
| { | |
| $re = '<a href="/media/video/(\d+)/" title=\'([^"]+)"([^"]+)"\'[^>]?+>.*</a></div>'; | |
| preg_match_all("#$re#isUm", $html, $match); | |
| if (isset($match[1]) && count($match[1])) | |
| { | |
| foreach($match[1] as $k => $id) | |
| { | |
| if ($this->_stop) | |
| { | |
| $this->_log('Stopped by until_id '.$this->_until_id); | |
| break; | |
| } | |
| $this->_add_song($id, $match[2][$k], $match[3][$k]); | |
| } | |
| } | |
| } | |
| /** | |
| * @var array | |
| */ | |
| private $_songs = array(); | |
| /** | |
| * @var bool | |
| */ | |
| private $_stop = FALSE; | |
| private function _add_song($id, $author, $song_name) | |
| { | |
| $author = trim($author); | |
| $song_name = trim(trim($song_name, '"')); | |
| $song_name = str_replace('?', '', $song_name); | |
| $this->_log('found song: ['.$id.'] '.$author.' - '.$song_name); | |
| $this->_songs[$id] = array('author' => $author, 'song_name' => $song_name); | |
| if (isset($this->_until_id) AND $this->_until_id == $id) | |
| { | |
| $this->_stop = TRUE; | |
| } | |
| } | |
| /** | |
| * @var int | |
| */ | |
| private $_page_cnt = 0; | |
| /** | |
| * @param inreger $page | |
| * @return string | |
| */ | |
| private function _build_url($page) | |
| { | |
| $this->_page_cnt++; | |
| $this->_log(' > page: '.$page); | |
| return str_replace(array('{page}', '{type}'), array($page, $this->_type), self::URL); | |
| } | |
| private function _update_id3() | |
| { | |
| // TODO: | |
| die; | |
| foreach(glob('*.mp3') as $filename) | |
| { | |
| $artist_title = $this->_helper_get_artist_title($filename);//." ($filename)\n"; | |
| echo $filename."\n"; | |
| //print_r($artist_title); | |
| system('mp3info "'.$filename.'" -f -a "'.$artist_title[0].'" -t "'.$artist_title[1].'"'); | |
| } | |
| echo "\n"; | |
| } | |
| private function _helper_get_artist_title($filename) | |
| { | |
| $filename = str_replace( | |
| array('Ө','ө','Ү','ү','Ң','ң'), | |
| array('О','о','У','у','Н','н'), | |
| $filename | |
| ); | |
| //$filename = iconv('UTF-8', 'ISO-8859-1//TRANSLIT', $filename); | |
| $filename = $this->_helper_translit($filename); | |
| $filename = trim($filename, '.mp3'); | |
| $pos = strpos($filename, '-'); | |
| $artist = substr($filename, 0, $pos-1); | |
| $title = substr($filename, $pos+2); | |
| return array($artist, $title); | |
| } | |
| private function _helper_translit($str) | |
| { | |
| $tr = array( | |
| "А"=>"A","Б"=>"B","В"=>"V","Г"=>"G", | |
| "Д"=>"D","Е"=>"E","Ж"=>"J","З"=>"Z","И"=>"I", | |
| "Й"=>"Y","К"=>"K","Л"=>"L","М"=>"M","Н"=>"N", | |
| "О"=>"O","П"=>"P","Р"=>"R","С"=>"S","Т"=>"T", | |
| "У"=>"U","Ф"=>"F","Х"=>"H","Ц"=>"TS","Ч"=>"CH", | |
| "Ш"=>"SH","Щ"=>"SCH","Ъ"=>"","Ы"=>"YI","Ь"=>"", | |
| "Э"=>"E","Ю"=>"YU","Я"=>"YA","а"=>"a","б"=>"b", | |
| "в"=>"v","г"=>"g","д"=>"d","е"=>"e","ж"=>"j", | |
| "з"=>"z","и"=>"i","й"=>"y","к"=>"k","л"=>"l", | |
| "м"=>"m","н"=>"n","о"=>"o","п"=>"p","р"=>"r", | |
| "с"=>"s","т"=>"t","у"=>"u","ф"=>"f","х"=>"h", | |
| "ц"=>"ts","ч"=>"ch","ш"=>"sh","щ"=>"sch","ъ"=>"y", | |
| "ы"=>"yi","ь"=>"","э"=>"e","ю"=>"yu","я"=>"ya","Ё"=>"E","ё"=>"e", | |
| ); | |
| return strtr($str, $tr); | |
| } | |
| /** | |
| * @param string $message | |
| */ | |
| private function _log($message) | |
| { | |
| static $fp, $fp_to_file; | |
| if ( ! $fp) | |
| { | |
| $fp = fopen('php://stdout', 'a+'); | |
| $fp_to_file = fopen('supermediag.log', 'a+'); | |
| fwrite($fp_to_file, "\n\n\n ------- ".date('d.m.Y H:i:s').":"); | |
| } | |
| fwrite($fp, $message."\n"); | |
| fwrite($fp_to_file, $message."\n"); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment