Created
May 17, 2025 16:18
-
-
Save tbbooher/886eb3f0b4ba262cdb646702ac9c7e16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import psycopg2 | |
| from time import sleep | |
| from datetime import datetime, timedelta | |
| # documentation | |
| # https://open.gsa.gov/api/get-opportunities-public-api/#user-account-api-key-creation | |
| # Constants | |
| API_KEY = "VdIf7RKVJATd3dCMnYZ64lZOyrM5O6lKSGOnc3bH" | |
| BASE_URL = "https://api.sam.gov/prod/opportunities/v2/search" | |
| PAGE_SIZE = 1000 | |
| DELAY = 3 # seconds between requests to avoid rate limits | |
| # PostgreSQL connection settings | |
| db_config = { | |
| "host": "localhost", | |
| "port": 5432, | |
| "dbname": "sam", | |
| "user": "tim", | |
| "password": "" # Add your password if needed | |
| } | |
| # SQL to create the table | |
| create_table_sql = """ | |
| CREATE TABLE IF NOT EXISTS dod_contract_opportunities ( | |
| id SERIAL PRIMARY KEY, | |
| notice_id TEXT UNIQUE, | |
| title TEXT, | |
| naics TEXT, | |
| agency TEXT, | |
| base_award_amount NUMERIC, | |
| total_award_amount NUMERIC, | |
| current_response_date TIMESTAMP, | |
| last_modified_date TIMESTAMP, | |
| last_published_date TIMESTAMP, | |
| contract_award_number TEXT, | |
| contract_award_date DATE, | |
| active_inactive TEXT | |
| ); | |
| """ | |
| # SQL to insert data | |
| insert_sql = """ | |
| INSERT INTO dod_contract_opportunities ( | |
| notice_id, title, naics, agency, base_award_amount, total_award_amount, | |
| current_response_date, last_modified_date, last_published_date, | |
| contract_award_number, contract_award_date, active_inactive | |
| ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) | |
| ON CONFLICT (notice_id) DO NOTHING; | |
| """ | |
| # Function to fetch a page of results | |
| def fetch_page(start, posted_from, posted_to): | |
| params = { | |
| "api_key": API_KEY, | |
| "limit": PAGE_SIZE, | |
| "start": start, | |
| "noticeType": "award", | |
| "organizationId": "300000", # Department of Defense | |
| "postedFrom": posted_from.strftime("%m/%d/%Y"), | |
| "postedTo": posted_to.strftime("%m/%d/%Y") | |
| } | |
| response = requests.get(BASE_URL, params=params) | |
| response.raise_for_status() | |
| return response.json() | |
| # Function to parse date fields safely | |
| def parse_datetime(date_str): | |
| if not date_str: | |
| return None | |
| try: | |
| return datetime.fromisoformat(date_str.replace("Z", "+00:00")) | |
| except ValueError: | |
| return None | |
| def parse_date(date_str): | |
| if not date_str: | |
| return None | |
| try: | |
| return datetime.strptime(date_str, "%Y-%m-%d").date() | |
| except ValueError: | |
| return None | |
| # Function to insert data into Postgres | |
| def insert_into_db(conn, data): | |
| with conn.cursor() as cur: | |
| for opp in data.get('opportunitiesData', []): | |
| cur.execute(insert_sql, ( | |
| opp.get('noticeId'), | |
| opp.get('title'), | |
| opp.get('naicsCode'), | |
| opp.get('agencyName'), | |
| opp.get('baseAwardAmount'), | |
| opp.get('totalAwardAmount'), | |
| parse_datetime(opp.get('responseDate')), | |
| parse_datetime(opp.get('lastModifiedDate')), | |
| parse_datetime(opp.get('publishDate')), | |
| opp.get('awardNumber'), | |
| parse_date(opp.get('awardDate')), | |
| opp.get('active') | |
| )) | |
| conn.commit() | |
| # Main loop to batch download and insert | |
| def main(): | |
| conn = psycopg2.connect(**db_config) | |
| with conn: | |
| with conn.cursor() as cur: | |
| cur.execute(create_table_sql) | |
| # Loop month-by-month from Jan 2024 to today | |
| start_date = datetime(2024, 1, 1) | |
| end_date = datetime.today() | |
| while start_date < end_date: | |
| next_month = start_date + timedelta(days=31) | |
| posted_from = start_date | |
| posted_to = min(end_date, next_month.replace(day=1) - timedelta(days=1)) | |
| print(f"\nFetching data from {posted_from.date()} to {posted_to.date()}") | |
| start = 0 | |
| while True: | |
| print(f" - Fetching records starting at {start}") | |
| data = fetch_page(start, posted_from, posted_to) | |
| insert_into_db(conn, data) | |
| if len(data.get('opportunitiesData', [])) < PAGE_SIZE: | |
| break | |
| start += PAGE_SIZE | |
| sleep(DELAY) | |
| start_date = next_month.replace(day=1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment