Skip to content

Instantly share code, notes, and snippets.

@mpizenberg
Created October 12, 2024 23:42
Show Gist options
  • Save mpizenberg/cccc24ffe6c0784b7dca2e8352e33a70 to your computer and use it in GitHub Desktop.
Save mpizenberg/cccc24ffe6c0784b7dca2e8352e33a70 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "requests",
# ]
# ///
import requests
import json
import os
import time
base_url = 'https://www.lidonation.com/api/catalyst-explorer/proposals'
headers = {
'accept': 'application/json',
'X-CSRF-TOKEN': 'useyourowntoken'
}
params = {
'challenge_id': 146, # Fund 13, Open Dev category
'per_page': 50
}
# Create a directory to store the JSON files
directory = f"catalyst_proposals_f13_open_dev"
os.makedirs(directory, exist_ok=True)
all_proposals = []
for page in range(6, 11): # This will fetch pages 1 to 10
print(f"Try downloading for page {page} ...")
params['page'] = page
response = requests.get(base_url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
proposals = data.get('data', [])
all_proposals.extend(proposals)
print(f"Fetched page {page}, got {len(proposals)} proposals")
# Write the raw JSON response to a file
filename = os.path.join(directory, f"page_{page}.json")
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"Saved raw data for page {page} to {filename}")
# waiting 5s to not overload the server
time.sleep(5.0)
else:
print(f"Failed to fetch page {page}. Status code: {response.status_code}")
break
# all_proposals_filename = os.path.join(directory, "all_proposals.json")
# with open(all_proposals_filename, 'w', encoding='utf-8') as f:
# json.dump(all_proposals, f, ensure_ascii=False, indent=2)
print(f"Total proposals fetched: {len(all_proposals)}")
print(f"Raw JSON responses saved in directory: {directory}")
# You can now work with the 'all_proposals' list, which contains all the fetched proposals
# The raw JSON responses are saved in individual files for each page
# /// script
# requires-python = ">=3.10"
# dependencies = [
# ]
# ///
import json
import csv
def json_to_csv(input_json_file, output_csv_file):
# Read the JSON file
with open(input_json_file, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
# Check if data is not empty
if not data:
print("The input JSON file is empty.")
return
# Get the field names from the first item in the JSON data
fieldnames = list(data[0].keys())
# Write to CSV file
with open(output_csv_file, 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
# Write the header
writer.writeheader()
# Write the data
for row in data:
writer.writerow(row)
print(f"CSV file '{output_csv_file}' has been created successfully.")
# Specify the input and output file names
input_json_file = 'aggregated_catalyst_data.json'
output_csv_file = 'catalyst_proposals.csv'
# Run the conversion
json_to_csv(input_json_file, output_csv_file)
# /// script
# requires-python = ">=3.10"
# dependencies = [
# ]
# ///
import json
import os
from typing import List, Dict
def read_json_file(file_path: str) -> Dict:
with open(file_path, 'r', encoding='utf-8') as file:
return json.load(file)
def extract_fields(proposal: Dict, fields: List[str]) -> Dict:
return {field: proposal.get(field) for field in fields}
def process_directory(directory: str, output_file: str, fields: List[str]):
all_data = []
# Iterate through all JSON files in the directory
for filename in sorted(os.listdir(directory)):
if filename.endswith('.json'):
file_path = os.path.join(directory, filename)
json_data = read_json_file(file_path)
# Extract the 'data' field from each file
proposals = json_data.get('data', [])
# Extract specified fields from each proposal
extracted_proposals = [extract_fields(proposal, fields) for proposal in proposals]
all_data.extend(extracted_proposals)
# Write the aggregated and extracted data to a new JSON file
with open(output_file, 'w', encoding='utf-8') as outfile:
json.dump(all_data, outfile, ensure_ascii=False, indent=2)
print(f"Processed {len(all_data)} proposals.")
print(f"Aggregated data saved to {output_file}")
# Specify the directory containing the JSON files
input_directory = "catalyst_proposals_f13_open_dev"
# Specify the output file name
output_file = "aggregated_catalyst_data.json"
# Specify the fields you're interested in
fields_of_interest = [
'id', 'user_id', 'title', 'ideascale_user', 'ideascale_id',
'ideascale_link', 'link', 'amount_requested', 'amount_received'
]
# Run the process
process_directory(input_directory, output_file, fields_of_interest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment