Created
November 14, 2018 16:20
-
-
Save cashiwamochi/c6892346adccd07bfcea0da89d4736d8 to your computer and use it in GitHub Desktop.
RealEstate10kのデータセットを生成するスクリプト.pytubeのバグでいくつかはDLできず,失敗した情報はテキストに吐き出される.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import glob | |
import subprocess | |
from pytube import YouTube | |
if __name__=="__main__": | |
if len(sys.argv) != 2: | |
print("usage: this.py [test or train]") | |
quit() | |
if sys.argv[1] == "test": | |
mode = "test" | |
elif sys.argv[1] == "train": | |
mode = "train" | |
else: | |
print("invalid mode") | |
quit() | |
data_root = "./RealEstate10K/" + mode | |
seqname_list = sorted(glob.glob(data_root + "/*.txt")) | |
print("{} sequences are saved".format(len(seqname_list))) | |
for txt_file in seqname_list: | |
print("{} is the current target.".format(txt_file)) | |
dir_name = txt_file.split('/')[-1] | |
dir_name = dir_name.split('.')[0] | |
output_root = './videos/' + mode + '/' + dir_name | |
if not os.path.exists(output_root): | |
os.makedirs(output_root) | |
else: | |
continue | |
seq_file = open(txt_file, "r") | |
lines = seq_file.readlines() | |
timestamp_list = [] | |
str_timestamp_list = [] | |
for idx, line in enumerate(lines): | |
if idx == 0: | |
youtube_url = line.strip() | |
else: | |
timestamp = int(line.split(' ')[0]) | |
str_timestamp_list.append(str(timestamp)) | |
timestamp = int(timestamp/1000) | |
str_hour = str(int(timestamp/3600000)).zfill(2) | |
str_min = str(int(int(timestamp%3600000)/60000)).zfill(2) | |
str_sec = str(int(int(int(timestamp%3600000)%60000)/1000)).zfill(2) | |
str_mill = str(int(int(int(timestamp%3600000)%60000)%1000)).zfill(3) | |
str_timestamp = str_hour+":"+str_min+":"+str_sec+"."+str_mill | |
timestamp_list.append(str_timestamp) | |
seq_file.close() | |
try : | |
yt = YouTube(youtube_url) | |
stream = yt.streams.first() | |
stream.download('./','current') | |
except : | |
failure_log = open('falied_videos.txt', 'a') | |
failure_log.writelines(txt_file+'\n') | |
failure_log.close() | |
continue | |
videoname_candinate_list = glob.glob('./*') | |
for videoname_candinate in videoname_candinate_list: | |
print(videoname_candinate.split('.')) | |
if videoname_candinate.split('.')[-2] == "/current": | |
videoname = videoname_candinate | |
# ffmpeg -i tmp.mp4 -ss 00:01:28.800 -vframes 1 -f image2 out.jpg | |
for idx, timestamp in enumerate(timestamp_list): | |
command = 'ffmpeg'+' -ss '+timestamp+' -i '+videoname+' -vframes 1 -f image2 '+output_root+'/'+str_timestamp_list[idx]+'.png' | |
os.system(command) | |
command = "rm " + videoname | |
os.system(command) | |
print("done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment