Created
December 14, 2024 15:38
-
-
Save mikkohei13/c04c54a8084b57b880e8df262e228e1f to your computer and use it in GitHub Desktop.
Script to parse ChatGPT conversations.json file to analyze how many messages were sent by ChatGPT and users, and how many conversations were created per year and month.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to parse ChatGPT conversations.json file to analyze how many messages were sent by ChatGPT and users, and how many conversations were created per year and month. | |
# conversations.json is a JSON file containing a list of conversations. You can get this by starting a data export in ChatGPT settings. | |
import json | |
from collections import defaultdict | |
from datetime import datetime | |
# Load the JSON file | |
file_path = "conversations.json" | |
with open(file_path, "r") as file: | |
conversations = json.load(file) | |
# Initialize counters | |
chatgpt_message_count = 0 | |
user_message_count = 0 | |
# Initialize conversation counters | |
conversations_per_year = defaultdict(int) | |
conversations_per_month = defaultdict(int) | |
# Initialize message counters per year and month | |
chatgpt_messages_per_year = defaultdict(int) | |
user_messages_per_year = defaultdict(int) | |
chatgpt_messages_per_month = defaultdict(int) | |
user_messages_per_month = defaultdict(int) | |
# Iterate through each conversation in the JSON list | |
for conversation in conversations: | |
conversation_timestamp = conversation.get("create_time", 0) | |
if conversation_timestamp: | |
conversation_date = datetime.fromtimestamp(conversation_timestamp) | |
year = conversation_date.year | |
month = conversation_date.strftime("%Y-%m") | |
conversations_per_year[year] += 1 | |
conversations_per_month[month] += 1 | |
messages_mapping = conversation.get("mapping", {}) | |
# Iterate through each message in the mapping | |
for key, value in messages_mapping.items(): | |
message = value.get("message", {}) | |
if message: # Check if the message exists | |
author = message.get("author", {}).get("role", "unknown") | |
message_timestamp = message.get("create_time", 0) | |
if message_timestamp: | |
message_date = datetime.fromtimestamp(message_timestamp) | |
message_year = message_date.year | |
message_month = message_date.strftime("%Y-%m") | |
if author == "tool" or author == "assistant": | |
chatgpt_message_count += 1 | |
chatgpt_messages_per_year[message_year] += 1 | |
chatgpt_messages_per_month[message_month] += 1 | |
if author == "user": | |
user_message_count += 1 | |
user_messages_per_year[message_year] += 1 | |
user_messages_per_month[message_month] += 1 | |
print(f"Total ChatGPT messages: {chatgpt_message_count}") | |
print(f"Total user messages: {user_message_count}") | |
conversations_per_year = dict(sorted(conversations_per_year.items())) | |
print("Conversations per year:") | |
for year, count in conversations_per_year.items(): | |
print(f"{year}: {count}") | |
conversations_per_month = dict(sorted(conversations_per_month.items())) | |
print("Conversations per month:") | |
for month, count in conversations_per_month.items(): | |
print(f"{month}: {count}") | |
chatgpt_messages_per_year = dict(sorted(chatgpt_messages_per_year.items())) | |
print("ChatGPT messages per year:") | |
for year, count in chatgpt_messages_per_year.items(): | |
print(f"{year}: {count}") | |
user_messages_per_year = dict(sorted(user_messages_per_year.items())) | |
print("User messages per year:") | |
for year, count in user_messages_per_year.items(): | |
print(f"{year}: {count}") | |
chatgpt_messages_per_month = dict(sorted(chatgpt_messages_per_month.items())) | |
print("ChatGPT messages per month:") | |
for month, count in chatgpt_messages_per_month.items(): | |
print(f"{month}: {count}") | |
user_messages_per_month = dict(sorted(user_messages_per_month.items())) | |
print("User messages per month:") | |
for month, count in user_messages_per_month.items(): | |
print(f"{month}: {count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment