cashiwamochi · November 14, 2018 16:20
diff --git a/generate_dataset_RE.py b/generate_dataset_RE.py
 import os
 import sys
 import glob
 import subprocess

 from pytube import YouTube


 if __name__=="__main__":
    if len(sys.argv) != 2:
        print("usage: this.py [test or train]")
        quit()

    if sys.argv[1] == "test":
        mode = "test"
    elif sys.argv[1] == "train":
        mode = "train"
    else:
        print("invalid mode")
        quit()

    data_root = "./RealEstate10K/" + mode

    seqname_list = sorted(glob.glob(data_root + "/*.txt"))
    print("{} sequences are saved".format(len(seqname_list)))

    for txt_file in seqname_list:
        print("{} is the current target.".format(txt_file))

        dir_name = txt_file.split('/')[-1]
        dir_name = dir_name.split('.')[0]
        output_root = './videos/' + mode + '/' + dir_name

        if not os.path.exists(output_root):
            os.makedirs(output_root)
        else:
            continue

        seq_file = open(txt_file, "r")
        lines = seq_file.readlines()
        timestamp_list = []
        str_timestamp_list = []
        for idx, line in enumerate(lines):
            if idx == 0:
                youtube_url = line.strip()
            else:
                timestamp = int(line.split(' ')[0])
                str_timestamp_list.append(str(timestamp))
                timestamp = int(timestamp/1000) 
                str_hour = str(int(timestamp/3600000)).zfill(2)
                str_min = str(int(int(timestamp%3600000)/60000)).zfill(2)
                str_sec = str(int(int(int(timestamp%3600000)%60000)/1000)).zfill(2)
                str_mill = str(int(int(int(timestamp%3600000)%60000)%1000)).zfill(3)
                str_timestamp = str_hour+":"+str_min+":"+str_sec+"."+str_mill
                timestamp_list.append(str_timestamp)

        seq_file.close()
        try :
            yt = YouTube(youtube_url)
            stream = yt.streams.first()
            stream.download('./','current')
        except :
            failure_log = open('falied_videos.txt', 'a')
            failure_log.writelines(txt_file+'\n')
            failure_log.close()
            continue

        videoname_candinate_list = glob.glob('./*')
        for videoname_candinate in videoname_candinate_list:
            print(videoname_candinate.split('.'))
            if videoname_candinate.split('.')[-2] == "/current":
                videoname = videoname_candinate

        # ffmpeg -i tmp.mp4 -ss 00:01:28.800 -vframes 1 -f image2 out.jpg
        for idx, timestamp in enumerate(timestamp_list):
            command = 'ffmpeg'+' -ss '+timestamp+' -i '+videoname+' -vframes 1 -f image2 '+output_root+'/'+str_timestamp_list[idx]+'.png'
            os.system(command)
        command = "rm " + videoname 
        os.system(command)
        print("done!")
	import os
	import sys
	import glob
	import subprocess

	from pytube import YouTube


	if __name__=="__main__":
	if len(sys.argv) != 2:
	print("usage: this.py [test or train]")
	quit()

	if sys.argv[1] == "test":
	mode = "test"
	elif sys.argv[1] == "train":
	mode = "train"
	else:
	print("invalid mode")
	quit()

	data_root = "./RealEstate10K/" + mode

	seqname_list = sorted(glob.glob(data_root + "/*.txt"))
	print("{} sequences are saved".format(len(seqname_list)))

	for txt_file in seqname_list:
	print("{} is the current target.".format(txt_file))

	dir_name = txt_file.split('/')[-1]
	dir_name = dir_name.split('.')[0]
	output_root = './videos/' + mode + '/' + dir_name

	if not os.path.exists(output_root):
	os.makedirs(output_root)
	else:
	continue

	seq_file = open(txt_file, "r")
	lines = seq_file.readlines()
	timestamp_list = []
	str_timestamp_list = []
	for idx, line in enumerate(lines):
	if idx == 0:
	youtube_url = line.strip()
	else:
	timestamp = int(line.split(' ')[0])
	str_timestamp_list.append(str(timestamp))
	timestamp = int(timestamp/1000)
	str_hour = str(int(timestamp/3600000)).zfill(2)
	str_min = str(int(int(timestamp%3600000)/60000)).zfill(2)
	str_sec = str(int(int(int(timestamp%3600000)%60000)/1000)).zfill(2)
	str_mill = str(int(int(int(timestamp%3600000)%60000)%1000)).zfill(3)
	str_timestamp = str_hour+":"+str_min+":"+str_sec+"."+str_mill
	timestamp_list.append(str_timestamp)

	seq_file.close()
	try :
	yt = YouTube(youtube_url)
	stream = yt.streams.first()
	stream.download('./','current')
	except :
	failure_log = open('falied_videos.txt', 'a')
	failure_log.writelines(txt_file+'\n')
	failure_log.close()
	continue

	videoname_candinate_list = glob.glob('./*')
	for videoname_candinate in videoname_candinate_list:
	print(videoname_candinate.split('.'))
	if videoname_candinate.split('.')[-2] == "/current":
	videoname = videoname_candinate

	# ffmpeg -i tmp.mp4 -ss 00:01:28.800 -vframes 1 -f image2 out.jpg
	for idx, timestamp in enumerate(timestamp_list):
	command = 'ffmpeg'+' -ss '+timestamp+' -i '+videoname+' -vframes 1 -f image2 '+output_root+'/'+str_timestamp_list[idx]+'.png'
	os.system(command)
	command = "rm " + videoname
	os.system(command)
	print("done!")