Created
July 24, 2024 00:57
-
-
Save lispc/a43c0fe372d49147a32675889ab57746 to your computer and use it in GitHub Desktop.
row_usage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
#psql -h XX -U XX XX -c "\copy (select index, convert_from(proof, 'UTF8')::json->>'row_usages' as row_usages from chunk where index < 3529100 and deleted_at IS NULL order by index desc limit 500) To STDOUT With CSV" > 500row.csv | |
# Open your file | |
with open('500row.csv', 'r') as file, open('output.csv', 'w', newline='') as output_file: | |
# Create DictReader | |
reader = csv.DictReader(file, fieldnames=['chunk_index', 'data']) | |
# Get the fieldnames | |
fieldnames = ['chunk_index'] + [i['name'] for i in json.loads(next(reader)['data'])] + ['max'] | |
# Create DictWriter | |
writer = csv.DictWriter(output_file, fieldnames=fieldnames) | |
# Write headers | |
writer.writeheader() | |
# Go back to the start of the file | |
file.seek(0) | |
for row in reader: | |
# Load the json data | |
data = json.loads(row['data']) | |
# Create a new row | |
new_row = {'chunk_index': row['chunk_index']} | |
# Add the data to the new row | |
for d in data: | |
new_row[d['name']] = d['row_number'] | |
# Calculate and add the max | |
new_row['max'] = max([d['row_number'] for d in data]) | |
# Write the new row | |
writer.writerow(new_row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment