mpizenberg · October 12, 2024 23:42
diff --git a/download_proposals.py b/download_proposals.py
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
 #     "requests",
 # ]
 # ///


 import requests
 import json
 import os
 import time

 base_url = 'https://www.lidonation.com/api/catalyst-explorer/proposals'
 headers = {
    'accept': 'application/json',
    'X-CSRF-TOKEN': 'useyourowntoken'
 }

 params = {
    'challenge_id': 146, # Fund 13, Open Dev category
    'per_page': 50
 }

 # Create a directory to store the JSON files
 directory = f"catalyst_proposals_f13_open_dev"
 os.makedirs(directory, exist_ok=True)

 all_proposals = []

 for page in range(6, 11):  # This will fetch pages 1 to 10
    print(f"Try downloading for page {page} ...")
    params['page'] = page
    
    response = requests.get(base_url, headers=headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        proposals = data.get('data', [])
        all_proposals.extend(proposals)
        print(f"Fetched page {page}, got {len(proposals)} proposals")
        
        # Write the raw JSON response to a file
        filename = os.path.join(directory, f"page_{page}.json")
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print(f"Saved raw data for page {page} to {filename}")

        # waiting 5s to not overload the server
        time.sleep(5.0)
    else:
        print(f"Failed to fetch page {page}. Status code: {response.status_code}")
        break

 # all_proposals_filename = os.path.join(directory, "all_proposals.json")
 # with open(all_proposals_filename, 'w', encoding='utf-8') as f:
 #     json.dump(all_proposals, f, ensure_ascii=False, indent=2)

 print(f"Total proposals fetched: {len(all_proposals)}")
 print(f"Raw JSON responses saved in directory: {directory}")

 # You can now work with the 'all_proposals' list, which contains all the fetched proposals
 # The raw JSON responses are saved in individual files for each page
diff --git a/json_to_csv.py b/json_to_csv.py
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
 # ]
 # ///

 import json
 import csv

 def json_to_csv(input_json_file, output_csv_file):
    # Read the JSON file
    with open(input_json_file, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)

    # Check if data is not empty
    if not data:
        print("The input JSON file is empty.")
        return

    # Get the field names from the first item in the JSON data
    fieldnames = list(data[0].keys())

    # Write to CSV file
    with open(output_csv_file, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        
        # Write the header
        writer.writeheader()
        
        # Write the data
        for row in data:
            writer.writerow(row)

    print(f"CSV file '{output_csv_file}' has been created successfully.")

 # Specify the input and output file names
 input_json_file = 'aggregated_catalyst_data.json'
 output_csv_file = 'catalyst_proposals.csv'

 # Run the conversion
 json_to_csv(input_json_file, output_csv_file)
diff --git a/process_proposals.py b/process_proposals.py
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
 # ]
 # ///

 import json
 import os
 from typing import List, Dict

 def read_json_file(file_path: str) -> Dict:
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

 def extract_fields(proposal: Dict, fields: List[str]) -> Dict:
    return {field: proposal.get(field) for field in fields}

 def process_directory(directory: str, output_file: str, fields: List[str]):
    all_data = []
    
    # Iterate through all JSON files in the directory
    for filename in sorted(os.listdir(directory)):
        if filename.endswith('.json'):
            file_path = os.path.join(directory, filename)
            json_data = read_json_file(file_path)
            
            # Extract the 'data' field from each file
            proposals = json_data.get('data', [])
            
            # Extract specified fields from each proposal
            extracted_proposals = [extract_fields(proposal, fields) for proposal in proposals]
            
            all_data.extend(extracted_proposals)
    
    # Write the aggregated and extracted data to a new JSON file
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump(all_data, outfile, ensure_ascii=False, indent=2)

    print(f"Processed {len(all_data)} proposals.")
    print(f"Aggregated data saved to {output_file}")

 # Specify the directory containing the JSON files
 input_directory = "catalyst_proposals_f13_open_dev"

 # Specify the output file name
 output_file = "aggregated_catalyst_data.json"

 # Specify the fields you're interested in
 fields_of_interest = [
    'id', 'user_id', 'title', 'ideascale_user', 'ideascale_id', 
    'ideascale_link', 'link', 'amount_requested', 'amount_received'
 ]

 # Run the process
 process_directory(input_directory, output_file, fields_of_interest)
	# /// script
	# requires-python = ">=3.10"
	# dependencies = [
	# "requests",
	# ]
	# ///


	import requests
	import json
	import os
	import time

	base_url = 'https://www.lidonation.com/api/catalyst-explorer/proposals'
	headers = {
	'accept': 'application/json',
	'X-CSRF-TOKEN': 'useyourowntoken'
	}

	params = {
	'challenge_id': 146, # Fund 13, Open Dev category
	'per_page': 50
	}

	# Create a directory to store the JSON files
	directory = f"catalyst_proposals_f13_open_dev"
	os.makedirs(directory, exist_ok=True)

	all_proposals = []

	for page in range(6, 11): # This will fetch pages 1 to 10
	print(f"Try downloading for page {page} ...")
	params['page'] = page

	response = requests.get(base_url, headers=headers, params=params)

	if response.status_code == 200:
	data = response.json()
	proposals = data.get('data', [])
	all_proposals.extend(proposals)
	print(f"Fetched page {page}, got {len(proposals)} proposals")

	# Write the raw JSON response to a file
	filename = os.path.join(directory, f"page_{page}.json")
	with open(filename, 'w', encoding='utf-8') as f:
	json.dump(data, f, ensure_ascii=False, indent=2)
	print(f"Saved raw data for page {page} to {filename}")

	# waiting 5s to not overload the server
	time.sleep(5.0)
	else:
	print(f"Failed to fetch page {page}. Status code: {response.status_code}")
	break

	# all_proposals_filename = os.path.join(directory, "all_proposals.json")
	# with open(all_proposals_filename, 'w', encoding='utf-8') as f:
	# json.dump(all_proposals, f, ensure_ascii=False, indent=2)

	print(f"Total proposals fetched: {len(all_proposals)}")
	print(f"Raw JSON responses saved in directory: {directory}")

	# You can now work with the 'all_proposals' list, which contains all the fetched proposals
	# The raw JSON responses are saved in individual files for each page