edgartanaka · March 3, 2025 17:55 · edgartanaka · Jul 6, 2020
diff --git a/download-series-tmdb.py b/download-series-tmdb.py
 import pandas as pd
 import urllib.request
 from tqdm import tqdm
 from multiprocessing.pool import ThreadPool
 import os.path

 #
 # This gist downloads all series from TMDB. You can easily modify this to download all movies.
 # It uses threads to parallelize downloads and speed up this process.
 # Depends on python 3. Tested on anaconda.
 # Steps:
 # 1. download this file from http://files.tmdb.org/p/exports/tv_series_ids_07_05_2020.json.gz and uncompress in the local directory
 # 2. create an api key in TMDB site
 # 3. set your api key in the script
 # 4. install python libs: pandas, tqdm
 # 5. run script
 #

 # TODO: add your api key here
 api_key = ''

 # Ref: https://developers.themoviedb.org/3/getting-started/daily-file-exports
 df = pd.read_json('tv_series_ids_07_05_2020.json', lines=True)
 ids = list(df['id'])

 urls = [(f"series_{id}.json", f"https://api.themoviedb.org/3/tv/{id}?api_key={api_key}") for id in ids]

 def log_failed(uri):
    with open('series_failed.txt', 'w') as writer:
        writer.write(uri)

 def is_file_exists(path):
    return os.path.isfile(path)

 def fetch_url(entry):
    try:
        path, uri = entry
        if not is_file_exists(path):
            urllib.request.urlretrieve(uri, path)

        return path
    except:
        log_failed(uri)

 results = ThreadPool(8).imap_unordered(fetch_url, urls)

 for path in tqdm(results):
    pass
	import pandas as pd
	import urllib.request
	from tqdm import tqdm
	from multiprocessing.pool import ThreadPool
	import os.path

	#
	# This gist downloads all series from TMDB. You can easily modify this to download all movies.
	# It uses threads to parallelize downloads and speed up this process.
	# Depends on python 3. Tested on anaconda.
	# Steps:
	# 1. download this file from http://files.tmdb.org/p/exports/tv_series_ids_07_05_2020.json.gz and uncompress in the local directory
	# 2. create an api key in TMDB site
	# 3. set your api key in the script
	# 4. install python libs: pandas, tqdm
	# 5. run script
	#

	# TODO: add your api key here
	api_key = ''

	# Ref: https://developers.themoviedb.org/3/getting-started/daily-file-exports
	df = pd.read_json('tv_series_ids_07_05_2020.json', lines=True)
	ids = list(df['id'])

	urls = [(f"series_{id}.json", f"https://api.themoviedb.org/3/tv/{id}?api_key={api_key}") for id in ids]

	def log_failed(uri):
	with open('series_failed.txt', 'w') as writer:
	writer.write(uri)

	def is_file_exists(path):
	return os.path.isfile(path)

	def fetch_url(entry):
	try:
	path, uri = entry
	if not is_file_exists(path):
	urllib.request.urlretrieve(uri, path)

	return path
	except:
	log_failed(uri)

	results = ThreadPool(8).imap_unordered(fetch_url, urls)

	for path in tqdm(results):
	pass