Created
February 12, 2025 19:41
-
-
Save ashenkin/00d30b2d906d9c38b7d2849ba8c63490 to your computer and use it in GitHub Desktop.
NAIP imagery download
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
# Your developer token (expires after 60 minutes) | |
ACCESS_TOKEN = "your_access_token" | |
# The base shared link for the public NAIP folder | |
BASE_SHARED_LINK = "https://nrcs.app.box.com/v/naip" | |
def download_folder_by_id(folder_id, local_dir): | |
""" | |
Recursively downloads all files and subfolders from a Box folder specified by its ID. | |
Before downloading, checks if a local file exists and compares file sizes. | |
:param folder_id: The Box folder ID (as a string) to process. | |
:param local_dir: The local directory in which to save files. | |
""" | |
# Construct the shared link for this folder | |
shared_link = f"{BASE_SHARED_LINK}?folder_id={folder_id}" | |
# Append fields to get file size information (for files) | |
list_url = f"https://api.box.com/2.0/folders/{folder_id}/items?fields=name,size" | |
headers = { | |
"Authorization": f"Bearer {ACCESS_TOKEN}", | |
"BoxApi": f"shared_link={shared_link}" | |
} | |
print(f"\nListing folder {folder_id} using shared link: {shared_link}") | |
response = requests.get(list_url, headers=headers) | |
if response.status_code != 200: | |
print(f"Error listing folder {folder_id}: HTTP {response.status_code}") | |
print("Response:", response.text) | |
return | |
data = response.json() | |
items = data.get("entries", []) | |
print(f"Found {len(items)} items in folder {folder_id}.") | |
# Create local directory if it doesn't exist | |
if not os.path.exists(local_dir): | |
os.makedirs(local_dir) | |
print(f"Created local directory: {local_dir}") | |
# Process each item in the folder | |
for item in items: | |
if item["type"] == "file": | |
print(f"\nProcessing file: {item['name']} (ID: {item['id']})") | |
download_url = f"https://api.box.com/2.0/files/{item['id']}/content" | |
file_headers = { | |
"Authorization": f"Bearer {ACCESS_TOKEN}", | |
"BoxApi": f"shared_link={shared_link}" | |
} | |
file_path = os.path.join(local_dir, item["name"]) | |
remote_size = item.get("size") | |
if remote_size is not None and os.path.exists(file_path): | |
local_size = os.path.getsize(file_path) | |
# If local file is at least 95% the size of remote file, assume it's complete. | |
if local_size >= remote_size * 0.95: | |
print( | |
f"File '{item['name']}' already exists and is complete (local: {local_size} bytes, remote: {remote_size} bytes). Skipping download.") | |
continue | |
else: | |
print( | |
f"File '{item['name']}' exists but is incomplete (local: {local_size} bytes, remote: {remote_size} bytes). Re-downloading.") | |
elif os.path.exists(file_path): | |
# If remote size is not available, but file exists, re-download to be safe. | |
print(f"File '{item['name']}' exists locally but remote size is unknown. Re-downloading.") | |
# Download the file (this will overwrite if it already exists) | |
r = requests.get(download_url, headers=file_headers, stream=True) | |
if r.status_code == 200: | |
with open(file_path, "wb") as f: | |
for chunk in r.iter_content(chunk_size=8192): | |
f.write(chunk) | |
print(f"Downloaded: {file_path}") | |
else: | |
print(f"Error downloading file '{item['name']}' (ID: {item['id']}): HTTP {r.status_code}") | |
print("Response:", r.text) | |
elif item["type"] == "folder": | |
print(f"\nEntering subfolder: {item['name']} (ID: {item['id']})") | |
new_local_dir = os.path.join(local_dir, item["name"]) | |
download_folder_by_id(item["id"], new_local_dir) | |
else: | |
print(f"Unknown item type '{item['type']}' for item {item['name']}") | |
# For example, start the recursive download from the AZ subfolder (folder ID: 248353538400). Fill with your own data. | |
download_folder_by_id("248353538400", "./NAIP_Download/2023/AZ") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment