Created
December 2, 2024 05:19
-
-
Save lennier1/9a2221ec2ad0c70fe2ccb0aa95de5f31 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import re | |
def main(): | |
if len(sys.argv) < 2: | |
print("Usage: generate_askfm_assets.py filename1 [filename2 ...]") | |
sys.exit(1) | |
input_filenames = sys.argv[1:] | |
small_thumbnails = set() | |
big_thumbnails = set() | |
videos = set() | |
for input_filename in input_filenames: | |
try: | |
with open(input_filename, 'r') as f: | |
for line in f: | |
filename = line.strip() | |
if not filename: | |
continue | |
# Match the filename pattern and extract the ID | |
match = re.match(r'p_video_answer_(\d+)(_big)?\.(jpg|mp4)', filename) | |
if match: | |
id_num = match.group(1) | |
# Pad the ID to at least 9 digits | |
id_padded = id_num.zfill(9) | |
# Split into three groups of three digits | |
groups = [id_padded[i:i+3] for i in range(0, 9, 3)] | |
# Build the path | |
path = '/'.join(groups) | |
# Build the base URL | |
base_url = f"asset:cvad.ask.fm/{path}/p_video_answer_{id_num}" | |
# Determine which type of file it is | |
if match.group(2) == '_big': | |
# It's a big thumbnail | |
big_thumbnail_url = f"{base_url}_big.jpg" | |
big_thumbnails.add(big_thumbnail_url) | |
# Generate small thumbnail and video URLs | |
small_thumbnail_url = f"{base_url}.jpg" | |
video_url = f"{base_url}.mp4" | |
small_thumbnails.add(small_thumbnail_url) | |
videos.add(video_url) | |
else: | |
# It's a small thumbnail or video | |
extension = match.group(3) | |
if extension == 'jpg': | |
small_thumbnail_url = f"{base_url}.jpg" | |
small_thumbnails.add(small_thumbnail_url) | |
# Generate big thumbnail and video URLs | |
big_thumbnail_url = f"{base_url}_big.jpg" | |
video_url = f"{base_url}.mp4" | |
big_thumbnails.add(big_thumbnail_url) | |
videos.add(video_url) | |
elif extension == 'mp4': | |
video_url = f"{base_url}.mp4" | |
videos.add(video_url) | |
# Generate small and big thumbnail URLs | |
small_thumbnail_url = f"{base_url}.jpg" | |
big_thumbnail_url = f"{base_url}_big.jpg" | |
small_thumbnails.add(small_thumbnail_url) | |
big_thumbnails.add(big_thumbnail_url) | |
else: | |
print(f"Invalid filename format in {input_filename}: {filename}", file=sys.stderr) | |
except FileNotFoundError: | |
print(f"File not found: {input_filename}", file=sys.stderr) | |
continue | |
# Write to output files | |
with open('small_thumbnails.txt', 'w') as f: | |
for url in sorted(small_thumbnails): | |
f.write(url + '\n') | |
with open('big_thumbnails.txt', 'w') as f: | |
for url in sorted(big_thumbnails): | |
f.write(url + '\n') | |
with open('videos.txt', 'w') as f: | |
for url in sorted(videos): | |
f.write(url + '\n') | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment