Created
October 12, 2024 23:42
-
-
Save mpizenberg/cccc24ffe6c0784b7dca2e8352e33a70 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.10" | |
# dependencies = [ | |
# "requests", | |
# ] | |
# /// | |
import requests | |
import json | |
import os | |
import time | |
base_url = 'https://www.lidonation.com/api/catalyst-explorer/proposals' | |
headers = { | |
'accept': 'application/json', | |
'X-CSRF-TOKEN': 'useyourowntoken' | |
} | |
params = { | |
'challenge_id': 146, # Fund 13, Open Dev category | |
'per_page': 50 | |
} | |
# Create a directory to store the JSON files | |
directory = f"catalyst_proposals_f13_open_dev" | |
os.makedirs(directory, exist_ok=True) | |
all_proposals = [] | |
for page in range(6, 11): # This will fetch pages 1 to 10 | |
print(f"Try downloading for page {page} ...") | |
params['page'] = page | |
response = requests.get(base_url, headers=headers, params=params) | |
if response.status_code == 200: | |
data = response.json() | |
proposals = data.get('data', []) | |
all_proposals.extend(proposals) | |
print(f"Fetched page {page}, got {len(proposals)} proposals") | |
# Write the raw JSON response to a file | |
filename = os.path.join(directory, f"page_{page}.json") | |
with open(filename, 'w', encoding='utf-8') as f: | |
json.dump(data, f, ensure_ascii=False, indent=2) | |
print(f"Saved raw data for page {page} to {filename}") | |
# waiting 5s to not overload the server | |
time.sleep(5.0) | |
else: | |
print(f"Failed to fetch page {page}. Status code: {response.status_code}") | |
break | |
# all_proposals_filename = os.path.join(directory, "all_proposals.json") | |
# with open(all_proposals_filename, 'w', encoding='utf-8') as f: | |
# json.dump(all_proposals, f, ensure_ascii=False, indent=2) | |
print(f"Total proposals fetched: {len(all_proposals)}") | |
print(f"Raw JSON responses saved in directory: {directory}") | |
# You can now work with the 'all_proposals' list, which contains all the fetched proposals | |
# The raw JSON responses are saved in individual files for each page |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.10" | |
# dependencies = [ | |
# ] | |
# /// | |
import json | |
import csv | |
def json_to_csv(input_json_file, output_csv_file): | |
# Read the JSON file | |
with open(input_json_file, 'r', encoding='utf-8') as json_file: | |
data = json.load(json_file) | |
# Check if data is not empty | |
if not data: | |
print("The input JSON file is empty.") | |
return | |
# Get the field names from the first item in the JSON data | |
fieldnames = list(data[0].keys()) | |
# Write to CSV file | |
with open(output_csv_file, 'w', newline='', encoding='utf-8') as csv_file: | |
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) | |
# Write the header | |
writer.writeheader() | |
# Write the data | |
for row in data: | |
writer.writerow(row) | |
print(f"CSV file '{output_csv_file}' has been created successfully.") | |
# Specify the input and output file names | |
input_json_file = 'aggregated_catalyst_data.json' | |
output_csv_file = 'catalyst_proposals.csv' | |
# Run the conversion | |
json_to_csv(input_json_file, output_csv_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# requires-python = ">=3.10" | |
# dependencies = [ | |
# ] | |
# /// | |
import json | |
import os | |
from typing import List, Dict | |
def read_json_file(file_path: str) -> Dict: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
return json.load(file) | |
def extract_fields(proposal: Dict, fields: List[str]) -> Dict: | |
return {field: proposal.get(field) for field in fields} | |
def process_directory(directory: str, output_file: str, fields: List[str]): | |
all_data = [] | |
# Iterate through all JSON files in the directory | |
for filename in sorted(os.listdir(directory)): | |
if filename.endswith('.json'): | |
file_path = os.path.join(directory, filename) | |
json_data = read_json_file(file_path) | |
# Extract the 'data' field from each file | |
proposals = json_data.get('data', []) | |
# Extract specified fields from each proposal | |
extracted_proposals = [extract_fields(proposal, fields) for proposal in proposals] | |
all_data.extend(extracted_proposals) | |
# Write the aggregated and extracted data to a new JSON file | |
with open(output_file, 'w', encoding='utf-8') as outfile: | |
json.dump(all_data, outfile, ensure_ascii=False, indent=2) | |
print(f"Processed {len(all_data)} proposals.") | |
print(f"Aggregated data saved to {output_file}") | |
# Specify the directory containing the JSON files | |
input_directory = "catalyst_proposals_f13_open_dev" | |
# Specify the output file name | |
output_file = "aggregated_catalyst_data.json" | |
# Specify the fields you're interested in | |
fields_of_interest = [ | |
'id', 'user_id', 'title', 'ideascale_user', 'ideascale_id', | |
'ideascale_link', 'link', 'amount_requested', 'amount_received' | |
] | |
# Run the process | |
process_directory(input_directory, output_file, fields_of_interest) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment