Skip to content

Instantly share code, notes, and snippets.

@tbbooher
Created May 17, 2025 16:18
Show Gist options
  • Save tbbooher/886eb3f0b4ba262cdb646702ac9c7e16 to your computer and use it in GitHub Desktop.
Save tbbooher/886eb3f0b4ba262cdb646702ac9c7e16 to your computer and use it in GitHub Desktop.
import requests
import psycopg2
from time import sleep
from datetime import datetime, timedelta
# documentation
# https://open.gsa.gov/api/get-opportunities-public-api/#user-account-api-key-creation
# Constants
API_KEY = "VdIf7RKVJATd3dCMnYZ64lZOyrM5O6lKSGOnc3bH"
BASE_URL = "https://api.sam.gov/prod/opportunities/v2/search"
PAGE_SIZE = 1000
DELAY = 3 # seconds between requests to avoid rate limits
# PostgreSQL connection settings
db_config = {
"host": "localhost",
"port": 5432,
"dbname": "sam",
"user": "tim",
"password": "" # Add your password if needed
}
# SQL to create the table
create_table_sql = """
CREATE TABLE IF NOT EXISTS dod_contract_opportunities (
id SERIAL PRIMARY KEY,
notice_id TEXT UNIQUE,
title TEXT,
naics TEXT,
agency TEXT,
base_award_amount NUMERIC,
total_award_amount NUMERIC,
current_response_date TIMESTAMP,
last_modified_date TIMESTAMP,
last_published_date TIMESTAMP,
contract_award_number TEXT,
contract_award_date DATE,
active_inactive TEXT
);
"""
# SQL to insert data
insert_sql = """
INSERT INTO dod_contract_opportunities (
notice_id, title, naics, agency, base_award_amount, total_award_amount,
current_response_date, last_modified_date, last_published_date,
contract_award_number, contract_award_date, active_inactive
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (notice_id) DO NOTHING;
"""
# Function to fetch a page of results
def fetch_page(start, posted_from, posted_to):
params = {
"api_key": API_KEY,
"limit": PAGE_SIZE,
"start": start,
"noticeType": "award",
"organizationId": "300000", # Department of Defense
"postedFrom": posted_from.strftime("%m/%d/%Y"),
"postedTo": posted_to.strftime("%m/%d/%Y")
}
response = requests.get(BASE_URL, params=params)
response.raise_for_status()
return response.json()
# Function to parse date fields safely
def parse_datetime(date_str):
if not date_str:
return None
try:
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
except ValueError:
return None
def parse_date(date_str):
if not date_str:
return None
try:
return datetime.strptime(date_str, "%Y-%m-%d").date()
except ValueError:
return None
# Function to insert data into Postgres
def insert_into_db(conn, data):
with conn.cursor() as cur:
for opp in data.get('opportunitiesData', []):
cur.execute(insert_sql, (
opp.get('noticeId'),
opp.get('title'),
opp.get('naicsCode'),
opp.get('agencyName'),
opp.get('baseAwardAmount'),
opp.get('totalAwardAmount'),
parse_datetime(opp.get('responseDate')),
parse_datetime(opp.get('lastModifiedDate')),
parse_datetime(opp.get('publishDate')),
opp.get('awardNumber'),
parse_date(opp.get('awardDate')),
opp.get('active')
))
conn.commit()
# Main loop to batch download and insert
def main():
conn = psycopg2.connect(**db_config)
with conn:
with conn.cursor() as cur:
cur.execute(create_table_sql)
# Loop month-by-month from Jan 2024 to today
start_date = datetime(2024, 1, 1)
end_date = datetime.today()
while start_date < end_date:
next_month = start_date + timedelta(days=31)
posted_from = start_date
posted_to = min(end_date, next_month.replace(day=1) - timedelta(days=1))
print(f"\nFetching data from {posted_from.date()} to {posted_to.date()}")
start = 0
while True:
print(f" - Fetching records starting at {start}")
data = fetch_page(start, posted_from, posted_to)
insert_into_db(conn, data)
if len(data.get('opportunitiesData', [])) < PAGE_SIZE:
break
start += PAGE_SIZE
sleep(DELAY)
start_date = next_month.replace(day=1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment