Skip to content

Instantly share code, notes, and snippets.

@SqrtRyan
Created April 7, 2025 09:35
Show Gist options
  • Save SqrtRyan/a76ad935db9e374171f519c70bb4b76d to your computer and use it in GitHub Desktop.
Save SqrtRyan/a76ad935db9e374171f519c70bb4b76d to your computer and use it in GitHub Desktop.
# XCloud Common Import Paths
import rp
import sys
sys.path += rp.get_absolute_paths(
"~/CleanCode/Management",
"~/CleanCode/Github/DiffusionAsShader",
# "~/CleanCode/Github/CogvideX-Interpolation-Mar23:MotionPrompting",
# "~/CleanCode/Github/CogvideX-Interpolation-Feb13:Inpainting",
)
import syncutil
##############################
@memoized
@file_cache_wrap(
"~/CleanCode/Datasets/Vids/Raw_Feb28/.cache/vids_gsls.txt",
save_file_lines,
load_file_lines,
)
def youtube_gsls():
#To refresh, run youtube_gsls.clear_cache()
return syncutil.gsutil_ls("~/CleanCode/Datasets/Vids/Raw_Feb28/vids")
@memoized
def youtube_gs_pairs():
youtube_gs_pairs = cluster_by_key(
youtube_gsls(),
key=lambda url: url.replace(".mp4", "").replace("_text.txt", ""),
)
youtube_gs_pairs = [sorted(pair) for pair in youtube_gs_pairs if len(pair) == 2]
# EXAMPLE: [
# ...,
# [
# 'gs://xcloud-shared/burgert/CleanCode/Datasets/Vids/Raw_Feb28/vids/srl24IxoHSE_294941855_300943717.mp4',
# 'gs://xcloud-shared/burgert/CleanCode/Datasets/Vids/Raw_Feb28/vids/srl24IxoHSE_294941855_300943717_text.txt'
# ],
# ...,
# ]
return youtube_gs_pairs
class GsSample:
def __init__(self, loc=None):
self.url = syncutil.get_xcloud_url(loc)
self.path = syncutil.get_local_cleancode_path(loc)
def download(self):
return syncutil.download(self.url, force=True)
def upload(self):
return syncutil.upload(self.path, force=True)
def delete_local():
os.system("rm -rf " + shlex.quote(self.path))
def __repr__(self):
return f"GsSample(path={self.path}, url={self.url})"
class RawYoutubeGsSample(GsSample):
ROOT = get_absolute_path("~/CleanCode/Datasets/Vids/Raw_Feb28/Processed_April7")
def __init__(self, sample_name, video_url, prompt_url):
self.sample_name = sample_name
self.video_url = video_url
self.prompt_url = prompt_url
loc = path_join(self.ROOT, sample_name)
super().__init__(loc)
self.video_path = path_join(loc, "video.mp4")
self.prompt_path = path_join(loc, "prompt.txt")
def download(self):
make_directory(self.path)
par_map(
download_url,
[self.video_url, self.prompt_url],
[self.video_path, self.prompt_path],
)
return self.path
class GsDataset:
def __len__(self):
return len(self.samples)
def __getitem__(self, i):
return self.samples[i]
class RawYoutubeDataset(GsDataset):
def __init__(self):
self.video_urls, self.prompt_urls = list_transpose(youtube_gs_pairs())
def __len__(self):
return len(self.video_urls)
def __getitem__(self, i):
return RawYoutubeGsSample(
get_file_name(self.video_urls[i], include_file_extension=False),
self.video_urls[i],
self.prompt_urls[i],
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment