Instead of await file.read(), you can save the file temporarily to disk and then process it:
import tempfile
from fastapi import UploadFile, HTTPException, status
async def process_file(file: UploadFile, mode: str):
if file.content_type != "application/pdf":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File is not a PDF. File type: {file.content_type}",
)
# create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
while chunk := await file.read(1024 * 1024): # read in 1MB chunks
tmp.write(chunk)
tmp_path = tmp.name
document = extract_metadata(tmp_path, file.filename, mode)
return document
FastAPI’s UploadFile is already based on SpooledTemporaryFile, so you can use file.file directly (without calling .read()):
async def process_file(file: UploadFile, mode: str):
if file.content_type != "application/pdf":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File is not a PDF. File type: {file.content_type}",
)
# pass the file object directly (no full in-memory load)
document = extract_metadata(file.file, file.filename, mode)
return document
This avoids loading the entire PDF into memory.