Created
July 2, 2025 03:22
-
-
Save SqrtRyan/c05658bad1b4d034d9c975b8fdc20b52 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rp import * | |
envato_dir = "/home/jupyter/CleanCode/Datasets/Envato" | |
@memoized | |
@file_cache_wrap(path_join(envato_dir, ".envato_with_hashfiles")) | |
def get_filtered_csv(): | |
csv = load_csv( | |
path_join(envato_dir, "captioned_envato_3869336.csv"), | |
show_progress=True, | |
use_cache=True, | |
) | |
hash_paths = os.listdir( | |
path_join(envato_dir, "Videos_Millions", show_progress=True) | |
) | |
csv["hash_paths"] = file_cache_call( | |
path_join(envato_dir, ".hash_paths.lines"), | |
get_file_names, | |
file_cache_call( | |
path_join(envato_dir, ".csv_cache_filenames.lines"), | |
get_cache_file_paths, | |
csv.video_url, | |
show_progress=True, | |
), | |
show_progress=True, | |
) | |
new_csv = csv[csv.hash_paths.isin(hash_paths)] | |
return new_csv | |
###### | |
import decord, torch | |
def load_first_middle_last_frames(path: str) -> np.ndarray: | |
import decord | |
vr = decord.VideoReader(path) | |
total_frames = len(vr) | |
middle_frame_index = total_frames // 2 | |
frames = vr.get_batch([0, middle_frame_index, total_frames - 1]).asnumpy() | |
return frames | |
# DEVICE = 0 | |
def process_row_helper(row): | |
#global DEVICE | |
#DEVICE += 1 | |
#DEVICE %= get_num_gpus() | |
video_path = path_join(envato_dir, "Videos_Millions", row.hash_paths) | |
video = load_first_middle_last_frames(video_path) | |
from rp.git.remove_watermark.remove_watermark_envato import remove_watermark | |
video = remove_watermark(video) | |
tracks, visible = run_cotracker(video, grid_size=25, device=DEVICE) | |
f, m, l = visible | |
score = int((f & l & ~m).sum()) | |
row = dict(row) | |
row["score"] = score | |
return row | |
def process_index(index): | |
row=csv.iloc[index] | |
cache_path = get_cache_file_path( | |
row, cache_dir=path_join(envato_dir, ".scored_rows_cache") | |
) | |
try: | |
result = file_cache_call(cache_path, process_row_helper, row) | |
results[row['hash_paths']] = result | |
except Exception as e: | |
fansi_print(e, "red bold") | |
csv = get_filtered_csv() | |
DEVICE=rp.select_torch_device(reserve=True) | |
indices=shuffled(range(len(csv))) | |
results = {} | |
load_files( | |
process_index, | |
indices, | |
show_progress=True, | |
num_threads=0#get_num_gpus() * 8, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment