Skip to content

Instantly share code, notes, and snippets.

@ashenkin
Created February 12, 2025 19:41
Show Gist options
  • Save ashenkin/00d30b2d906d9c38b7d2849ba8c63490 to your computer and use it in GitHub Desktop.
Save ashenkin/00d30b2d906d9c38b7d2849ba8c63490 to your computer and use it in GitHub Desktop.
NAIP imagery download
import os
import requests
# Your developer token (expires after 60 minutes)
ACCESS_TOKEN = "your_access_token"
# The base shared link for the public NAIP folder
BASE_SHARED_LINK = "https://nrcs.app.box.com/v/naip"
def download_folder_by_id(folder_id, local_dir):
"""
Recursively downloads all files and subfolders from a Box folder specified by its ID.
Before downloading, checks if a local file exists and compares file sizes.
:param folder_id: The Box folder ID (as a string) to process.
:param local_dir: The local directory in which to save files.
"""
# Construct the shared link for this folder
shared_link = f"{BASE_SHARED_LINK}?folder_id={folder_id}"
# Append fields to get file size information (for files)
list_url = f"https://api.box.com/2.0/folders/{folder_id}/items?fields=name,size"
headers = {
"Authorization": f"Bearer {ACCESS_TOKEN}",
"BoxApi": f"shared_link={shared_link}"
}
print(f"\nListing folder {folder_id} using shared link: {shared_link}")
response = requests.get(list_url, headers=headers)
if response.status_code != 200:
print(f"Error listing folder {folder_id}: HTTP {response.status_code}")
print("Response:", response.text)
return
data = response.json()
items = data.get("entries", [])
print(f"Found {len(items)} items in folder {folder_id}.")
# Create local directory if it doesn't exist
if not os.path.exists(local_dir):
os.makedirs(local_dir)
print(f"Created local directory: {local_dir}")
# Process each item in the folder
for item in items:
if item["type"] == "file":
print(f"\nProcessing file: {item['name']} (ID: {item['id']})")
download_url = f"https://api.box.com/2.0/files/{item['id']}/content"
file_headers = {
"Authorization": f"Bearer {ACCESS_TOKEN}",
"BoxApi": f"shared_link={shared_link}"
}
file_path = os.path.join(local_dir, item["name"])
remote_size = item.get("size")
if remote_size is not None and os.path.exists(file_path):
local_size = os.path.getsize(file_path)
# If local file is at least 95% the size of remote file, assume it's complete.
if local_size >= remote_size * 0.95:
print(
f"File '{item['name']}' already exists and is complete (local: {local_size} bytes, remote: {remote_size} bytes). Skipping download.")
continue
else:
print(
f"File '{item['name']}' exists but is incomplete (local: {local_size} bytes, remote: {remote_size} bytes). Re-downloading.")
elif os.path.exists(file_path):
# If remote size is not available, but file exists, re-download to be safe.
print(f"File '{item['name']}' exists locally but remote size is unknown. Re-downloading.")
# Download the file (this will overwrite if it already exists)
r = requests.get(download_url, headers=file_headers, stream=True)
if r.status_code == 200:
with open(file_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print(f"Downloaded: {file_path}")
else:
print(f"Error downloading file '{item['name']}' (ID: {item['id']}): HTTP {r.status_code}")
print("Response:", r.text)
elif item["type"] == "folder":
print(f"\nEntering subfolder: {item['name']} (ID: {item['id']})")
new_local_dir = os.path.join(local_dir, item["name"])
download_folder_by_id(item["id"], new_local_dir)
else:
print(f"Unknown item type '{item['type']}' for item {item['name']}")
# For example, start the recursive download from the AZ subfolder (folder ID: 248353538400). Fill with your own data.
download_folder_by_id("248353538400", "./NAIP_Download/2023/AZ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment