Created
April 7, 2025 09:35
-
-
Save SqrtRyan/a76ad935db9e374171f519c70bb4b76d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# XCloud Common Import Paths | |
import rp | |
import sys | |
sys.path += rp.get_absolute_paths( | |
"~/CleanCode/Management", | |
"~/CleanCode/Github/DiffusionAsShader", | |
# "~/CleanCode/Github/CogvideX-Interpolation-Mar23:MotionPrompting", | |
# "~/CleanCode/Github/CogvideX-Interpolation-Feb13:Inpainting", | |
) | |
import syncutil | |
############################## | |
@memoized | |
@file_cache_wrap( | |
"~/CleanCode/Datasets/Vids/Raw_Feb28/.cache/vids_gsls.txt", | |
save_file_lines, | |
load_file_lines, | |
) | |
def youtube_gsls(): | |
#To refresh, run youtube_gsls.clear_cache() | |
return syncutil.gsutil_ls("~/CleanCode/Datasets/Vids/Raw_Feb28/vids") | |
@memoized | |
def youtube_gs_pairs(): | |
youtube_gs_pairs = cluster_by_key( | |
youtube_gsls(), | |
key=lambda url: url.replace(".mp4", "").replace("_text.txt", ""), | |
) | |
youtube_gs_pairs = [sorted(pair) for pair in youtube_gs_pairs if len(pair) == 2] | |
# EXAMPLE: [ | |
# ..., | |
# [ | |
# 'gs://xcloud-shared/burgert/CleanCode/Datasets/Vids/Raw_Feb28/vids/srl24IxoHSE_294941855_300943717.mp4', | |
# 'gs://xcloud-shared/burgert/CleanCode/Datasets/Vids/Raw_Feb28/vids/srl24IxoHSE_294941855_300943717_text.txt' | |
# ], | |
# ..., | |
# ] | |
return youtube_gs_pairs | |
class GsSample: | |
def __init__(self, loc=None): | |
self.url = syncutil.get_xcloud_url(loc) | |
self.path = syncutil.get_local_cleancode_path(loc) | |
def download(self): | |
return syncutil.download(self.url, force=True) | |
def upload(self): | |
return syncutil.upload(self.path, force=True) | |
def delete_local(): | |
os.system("rm -rf " + shlex.quote(self.path)) | |
def __repr__(self): | |
return f"GsSample(path={self.path}, url={self.url})" | |
class RawYoutubeGsSample(GsSample): | |
ROOT = get_absolute_path("~/CleanCode/Datasets/Vids/Raw_Feb28/Processed_April7") | |
def __init__(self, sample_name, video_url, prompt_url): | |
self.sample_name = sample_name | |
self.video_url = video_url | |
self.prompt_url = prompt_url | |
loc = path_join(self.ROOT, sample_name) | |
super().__init__(loc) | |
self.video_path = path_join(loc, "video.mp4") | |
self.prompt_path = path_join(loc, "prompt.txt") | |
def download(self): | |
make_directory(self.path) | |
par_map( | |
download_url, | |
[self.video_url, self.prompt_url], | |
[self.video_path, self.prompt_path], | |
) | |
return self.path | |
class GsDataset: | |
def __len__(self): | |
return len(self.samples) | |
def __getitem__(self, i): | |
return self.samples[i] | |
class RawYoutubeDataset(GsDataset): | |
def __init__(self): | |
self.video_urls, self.prompt_urls = list_transpose(youtube_gs_pairs()) | |
def __len__(self): | |
return len(self.video_urls) | |
def __getitem__(self, i): | |
return RawYoutubeGsSample( | |
get_file_name(self.video_urls[i], include_file_extension=False), | |
self.video_urls[i], | |
self.prompt_urls[i], | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment