Created
September 14, 2016 12:51
-
-
Save Ishibasystems/3c96ed7a3fd471d48e9b00ad3106cc67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
# -*- coding: utf-8 -*- | |
import datetime | |
import youtube_dl | |
import subprocess | |
from sys import exit, argv | |
from os.path import exists | |
from modules import text | |
from traceback import format_exc | |
from urllib.request import urlopen | |
# (youtubeで画質選択に必要) ffmpegをインストールしてあるか | |
ffmpeg = True | |
list_videos = '/mnt/download/youtube-dl.log' # DL済み動画リスト | |
download_def = "/mnt/download/video/" # 保存ディレクトリ | |
download_tw = "/mnt/download/video/vine/" # twitter動画の保存ディレクトリ | |
# ニコニコのアカウントをここに入力しておく | |
user = '' | |
pswd = '' | |
# 祝日カレンダー | |
holiday = {(9, 19), (9, 22), (10, 10), (11, 3), (11, 23), (12, 23), } | |
def command(x): | |
try: | |
p = subprocess.Popen(x, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
hoge = p.communicate() | |
return hoge[0].decode('utf-8', 'ignore') + '\n' + hoge[1].decode('utf-8', 'ignore') | |
except: | |
print(format_exc() + 'Command Error: ' + ' '.join(x) + '\n') | |
return '' | |
class catchmsg(object): | |
def __init__(self): | |
self.logmsg = '' | |
def input(self, msg): | |
self.logmsg += msg.strip().replace('\033[0;31m', '').replace('\033[0m', '') + '\n' | |
def debug(self, msg): self.input(msg) | |
def warning(self, msg): self.input(msg) | |
def error(self, msg): self.input(msg) | |
def getmsg(self): | |
hoge = self.logmsg | |
self.__init__() | |
return hoge | |
class sharpmsg(object): | |
def __init__(self): | |
self.logmsg = '' | |
self.countr = 0 | |
self.tim = datetime.datetime.today() | |
def set(self, msg): | |
self.__init__() | |
self.logmsg = msg | |
def add(self, list, msg): | |
if self.logmsg: | |
msg = self.logmsg + '\n' + msg | |
self.__init__() | |
self.countr += 1 | |
text.add(list, msg) | |
def dummy(ex): | |
sharp.set('# ' + str(datetime.datetime.today()) + '\t' + url[2:].split('\t')[-1]) | |
def deny(title): | |
# タイトルにこれらが入っていたらダウンロードしない | |
if False or \ | |
'4コマ' in title or \ | |
'4コマ' in title or \ | |
'四コマ' in title or \ | |
'BGM' in title or \ | |
'MMDドラマ' in title or \ | |
'エロゲ' in title or \ | |
'ギャルゲ' in title or \ | |
'コマ劇場' in title or \ | |
'ニコ生' in title or \ | |
'プレイ動画' in title or \ | |
'画像集' in title or \ | |
'紙芝居' in title or \ | |
'逆再生' in title or \ | |
'作業用' in title or \ | |
'実況' in title or \ | |
'手書' in title or \ | |
'手描' in title or \ | |
('第' in title and '話' in title): | |
return True | |
return False | |
def main(list_urls): | |
download = download_def | |
urls_list = list(text.read(list_urls)) | |
sets_list = set() | |
if len(urls_list) == 0: | |
exit(0) | |
list_log = list_urls + '.log' | |
# 処理リストテキスト初期化: 日本語とLFを埋め込み他エディタでもUTF-8N LFで読み書きするテキストに固定 | |
text.write(list_urls, '# UTF-8N LF 厳守\n') | |
text.write(list_log , '') | |
sharp = sharpmsg() | |
for url in urls_list: | |
if url.startswith('# '): | |
sharp.set('# ' + str(datetime.datetime.today()) + '\t' + url[2:].split('\t')[-1]) | |
elif url.startswith('http'): | |
url = url.split(' ')[0] | |
try: | |
if url[-1] == '/': | |
url = url[:-1] | |
if 'youtu.be' in url or 'm.youtube.com' in url or 'nico.ms' in url: | |
try: url = urlopen(url).geturl() | |
except: pass | |
nicovid_flag = 'nico.ms' in url or 'www.nicovideo.jp' in url | |
youtube_flag = 'youtu.be' in url or 'm.youtube.com' in url or 'www.youtube.com' in url | |
if youtube_flag: | |
if 'playlist?' in url and 'list=' in url: | |
url = url.split('playlist?')[0] + 'playlist?list=' + url.split('list=')[1].split('&')[0] | |
if '?' in url and 'v=' in url: | |
url = url.split('?')[0] + '?v=' + url.split('v=')[1].split('&')[0] | |
if nicovid_flag: | |
url = url.split('?')[0].split('/videoExplorer')[0] | |
if url.split('/')[-1] in text.read(list_videos): | |
text.add(list_log, 'SKIP: duplicate video in log: ' + url) | |
continue | |
if url.split('/')[-1] in sets_list: | |
text.add(list_log, 'SKIP: duplicate video in list: ' + url) | |
continue | |
sets_list.add(url.split('/')[-1]) | |
ydl_opts = {'quiet': True, 'ignoreerrors': True, 'logger': catchmsg(), 'format': 'best', 'outtmpl': download + '%(title)s.%(ext)s'} | |
# Filesystem free space check | |
stdout_data = command(['df', download]) | |
try: | |
if float(stdout_data.split('\n')[1].split()[3]) < 2**20: | |
sharp.add(list_urls, url + ' ! SKIP: Filesystem free space is <1GB') | |
continue | |
except: | |
text.add(list_log, format_exc() + 'disk check is failed\n' + stdout_data + '\n') | |
sharp.add(list_urls, url + ' ! SKIP: disk check is failed') | |
continue | |
# video info check (youtube or nicovideo) | |
if nicovid_flag or youtube_flag: | |
# nicovideo低画質回避 | |
if nicovid_flag: | |
if '/sm' in url: | |
d = datetime.datetime.today() | |
# 時間帯(18~26時)回避 | |
if d.hour < 2 or 18 <= d.hour: | |
sharp.add(list_urls, url + ' | SKIP: economy mode hour (基本時間外)') | |
continue | |
# 時間帯(12~26時)回避 | |
if 12 <= d.hour: | |
if 4 < d.weekday(): | |
sharp.add(list_urls, url + ' | SKIP: economy mode hour (土曜・日曜)') | |
continue | |
# 祝日など諸事情 | |
if (d.month, d.day) in holiday: | |
sharp.add(list_urls, url + ' | SKIP: economy mode hour (計画内)') | |
continue | |
# 夏休み(海の日翌日~8月中) | |
if d.month == 8 or (d.month == 7 and d.day + (7 - datetime.date(d.year, 7, 1).weekday()) % 7 > 15): | |
sharp.add(list_urls, url + ' | SKIP: economy mode hour (夏休み)') | |
continue | |
# ERROR: Unable to extract thumbPlayKey 認証しなければ動画情報が取得できない | |
ydl_opts['username'] = user | |
ydl_opts['password'] = pswd | |
try: | |
info = youtube_dl.YoutubeDL(ydl_opts).extract_info(url, download = False) | |
except youtube_dl.utils.DownloadError as e: | |
print('YoutubeDL().extract_info ' + url + '\n' + str(e.args)) | |
sharp.add(list_urls, url + ' ! ' + e.args[0]) | |
continue | |
stdout_data = ydl_opts['logger'].getmsg() | |
# YoutubePlaylist(展開のみ リストに削除動画などある可能性があるのでエラー検知前に) | |
if info is not None and info['extractor'] == 'youtube:playlist': | |
for x in info['entries']: | |
if x is not None: | |
sharp.add(list_urls, x['webpage_url'] + ' | Extract from ' + url) | |
continue | |
# extract_infoエラー | |
elif 'ERROR:' in stdout_data: | |
text.add(list_log, stdout_data + 'YoutubeDL().extract_info ' + url + '\n') | |
sharp.add(list_urls, url + ' ! ' + stdout_data.strip().split('\n')[-1].split(';')[0]) | |
continue | |
if not nicovid_flag: | |
pass | |
ydl_opts['logger'].input('memo: YoutubeDL().extract_info is failure') | |
# 低画質時間帯回避漏れの動画(ニコニコの無料アカウントでのチャンネル放送動画など) | |
elif nicovid_flag and info['format_id'] == 'economy': | |
sharp.add(list_urls, url + ' ! SKIP: economy mode video') | |
continue | |
# どの膳だよ | |
elif info['title'] == '膳': | |
ydl_opts['outtmpl'] = download + '膳%(id)s.%(ext)s' | |
# [title] 禁止ワード | |
elif deny(info['title']): | |
sharp.add(list_urls, url + ' ! SKIP: [title] 禁止ワードを確認してください') | |
continue | |
# youtube select more High-Resolution (e.g. FullHD - 8K) best video if enable ffmpeg | |
if ffmpeg and '/channel/' not in url and youtube_flag: | |
mode = [info['format_id'], info['height'], 0] | |
try: | |
for format in info['formats']: | |
if 'height' in format and format['height'] is not None and mode[1] <= format['height']: | |
# 高さが同じか大きい動画 | |
if format['acodec'] == 'none': | |
# ファイルサイズが大きいなら選択 | |
if 'filesize' in format and format['filesize'] is not None and mode[2] < format['filesize']: | |
mode = [format['format_id'], format['height'], format['filesize']] | |
else: | |
# 高さが大きいビデオなら音声付き動画でもOK・ファイルサイズは比較不可なので0セット | |
mode = [format['format_id'], format['height'], 0] | |
if info['format_id'] != mode[0]: | |
# best以上の動画が見つかっている | |
ydl_opts['format'] = mode[0] | |
mode = [info['format_id'], 0] | |
for format in info['formats']: | |
if format['acodec'] != 'none' and 'filesize' in format and mode[1] < format['filesize']: | |
mode = [format['format_id'], format['filesize']] | |
ydl_opts['format'] += '+' + mode[0] | |
except: | |
text.add(list_log, format_exc() + 'FFmpeg codec-selector Parser Error: ' + url + '\n') | |
sharp.add(list_urls, url + ' ! FFmpeg codec-selector Parser Error') | |
continue | |
if 'vine.co' in url: | |
ydl_opts['outtmpl'] = download_tw + url.split('/')[-1] + '.%(ext)s' | |
try: | |
youtube_dl.YoutubeDL(ydl_opts).download([url]) | |
except youtube_dl.utils.DownloadError as e: | |
print('YoutubeDL().download ' + url + '\n' + str(e.args)) | |
sharp.add(list_urls, url + ' ! ' + e.args[0]) | |
continue | |
stdout_data = ydl_opts['logger'].getmsg() | |
# downloadエラー | |
if 'ERROR:' in stdout_data: | |
text.add(list_log, stdout_data + 'YoutubeDL().download ' + url + '\n') | |
sharp.add(list_urls, url + ' ! ' + stdout_data.strip().split('\n')[-1].split(';')[0]) | |
elif 'has already been downloaded' in stdout_data: | |
text.add(list_log, 'SKIP: duplicate video in file: ' + url) | |
else: | |
text.add(list_videos, url.split('/')[-1]) | |
except: | |
# youtube_dl.utils.DownloadErrorで拾えないのとかもここ | |
text.add(list_log, format_exc() + 'Critical Error: ' + url + '\n') | |
sharp.add(list_urls, url + ' ! Critical Error') | |
if __name__ == '__main__': | |
if len(argv) < 2: | |
exit(0) | |
main(argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment