-
-
Save naptar/7c67bd755632f7a0e5a0990424875d5b to your computer and use it in GitHub Desktop.
# Run this in the same directory as the Hangouts.json file generated by Google Takeout / Data Export tool. | |
# python3 hangouts.py | |
import json | |
import datetime | |
import os | |
import shutil | |
import re | |
chat_dir = "hangouts" | |
if os.path.exists(chat_dir): | |
print("Hangouts directory already exists. Should I remove it before proceeding? y/n") | |
if input().lower() == "y": | |
print("Removing the hangouts directory.") | |
shutil.rmtree(chat_dir) | |
else: | |
print("Exiting.") | |
quit() | |
class User: | |
def __init__(self, id, name): | |
self.id = id | |
self.name = name | |
def setName(self, name): | |
self.name = name | |
def getId(self): | |
return self.id | |
def getName(self): | |
return self.name | |
class Message: | |
def __init__(self, id, sender_id, sender_name, timestamp, text): | |
self.id = id | |
self.sender_id = sender_id | |
self.sender_name = sender_name | |
self.timestamp = datetime.datetime(1970,1,1) + datetime.timedelta(microseconds=int(timestamp)) + datetime.timedelta(hours=2) # Webkit time to UTC to GMT+2 conversion | |
self.text = text | |
def display(self): | |
return "[" + self.timestamp.strftime('%Y-%m-%d %H:%M:%S') + "] " + self.sender_name + ": " + self.text | |
class ConversationSet: | |
def __init__(self): | |
self.conversations = dict() | |
def getParticipantNameById(self, id): | |
for c in self.conversations: | |
if self.conversations[c].getParticipantById(id) is not None: | |
return self.conversations[c].getParticipantById(id).name | |
return "Unknown" | |
def addConversationParticipants(self, id, json_participant_data): | |
self.conversations[id] = Conversation(id) | |
for participant in json_participant_data: | |
p_id = participant["id"]["gaia_id"] | |
if "fallback_name" in participant: | |
self.conversations[id].addParticipant(p_id, participant["fallback_name"]) | |
else: | |
self.conversations[id].addParticipant(p_id, "") | |
def addConversationEvents(self, id, json_participant_data, json_event_data): | |
conversation = self.getConversationById(id) | |
for participant in json_participant_data: | |
p_id = participant["id"]["gaia_id"] | |
if "fallback_name" in participant: | |
self.conversations[id].setParticipantName(p_id, participant["fallback_name"]) | |
else: | |
self.conversations[id].setParticipantName(p_id, self.getParticipantNameById(p_id)) | |
for event in json_event_data: | |
if event["event_type"] == "REGULAR_CHAT_MESSAGE": | |
message_text_segments = [] | |
if "segment" in event["chat_message"]["message_content"]: | |
for segment in event["chat_message"]["message_content"]["segment"]: | |
message_text_segments.append(segment["text"]) | |
self.conversations[id].addMessage( | |
event["event_id"], | |
event["sender_id"]["gaia_id"], | |
self.conversations[id].getParticipantById(event["sender_id"]["gaia_id"]).name, | |
event["timestamp"], | |
"".join(message_text_segments) | |
) | |
def getConversations(self): | |
list = [] | |
for c in self.conversations: | |
list.append(self.conversations[c]) | |
return list | |
def getConversationById(self, id): | |
for c in self.getConversations(): | |
if c.id == id: | |
return c | |
return None | |
class Conversation: | |
def __init__(self, id): | |
self.id = id | |
self.participants = dict() | |
self.messages = [] | |
def addParticipant(self, id, name): | |
if id not in self.participants: | |
self.participants[id] = User(id, name) | |
def addMessage(self, id, sender_id, sender_name, timestamp, text): | |
self.messages.append(Message(id, sender_id, sender_name, timestamp, text)) | |
def getMessages(self): | |
return self.messages | |
def setParticipantName(self, id, name): | |
if id in self.participants: | |
self.participants[id].setName(name) | |
def getId(self): | |
return self.id | |
def getParticipants(self): | |
list = [] | |
for p in self.participants: | |
list.append(self.participants[p]) | |
return list | |
def getParticipantById(self, id): | |
if id in self.participants: | |
return self.participants[id] | |
return None | |
def participantCount(self): | |
return len(self.participants) | |
def get_valid_filename(s): | |
# https://github.com/django/django/blob/master/django/utils/text.py#L218 | |
s = str(s).strip().replace(' ', '_') | |
return re.sub(r'(?u)[^-\w.]', '', s) | |
print("Processing Hangouts.json ..") | |
with open('Hangouts.json', 'r') as f: | |
hangouts_dict = json.load(f) | |
conversations = ConversationSet() | |
for hangout in hangouts_dict["conversations"]: | |
if "conversation" in hangout: | |
conversations.addConversationParticipants( | |
hangout["conversation"]["conversation_id"]["id"], | |
hangout["conversation"]["conversation"]["participant_data"] | |
) | |
for hangout in hangouts_dict["conversations"]: | |
if "conversation" in hangout: | |
conversations.addConversationEvents( | |
hangout["conversation"]["conversation_id"]["id"], | |
hangout["conversation"]["conversation"]["participant_data"], | |
hangout["events"] | |
) | |
os.makedirs(chat_dir) | |
for c in conversations.getConversations(): | |
participants = [] | |
f_name = "-" | |
for p in c.getParticipants(): | |
f_name = p.name | |
participants.append(p.name) | |
if c.participantCount() > 1: | |
f_name = " and ".join(participants) | |
if os.path.isfile(chat_dir + "/" + get_valid_filename(f_name) + ".txt"): | |
f_name = f_name + '_2' | |
c_file = open(chat_dir + "/" + get_valid_filename(f_name) + ".txt", "w") | |
for m in c.getMessages(): | |
c_file.write(m.display() + "\n") | |
c_file.close() | |
print("Done. Check the hangouts directory for chat output files.") |
can you help
Traceback (most recent call last):
File "/Users/Downloads/Takeout-2/Hangouts/hangouts.py", line 148, in
hangouts_dict = json.load(f)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/json/init.py", line 265, in load
return loads(fp.read(),
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 7578: ordinal not in range(128)
having this error @fallenby
Hi there @kroy1200. I think I got this script from another place and put it here for safekeeping, though I don't think I wrote it myself. It could be that the format of the JSON file has changed, and this script no longer works, since this was uploaded in 2018.
I dont know know what I am doing wrong. Few months back it worked and now its not. Anyways, Thanks for the help.Much appreciated.
Hmm, I'm not sure. Sorry that I can't be of much help - I am out of touch with regards to hangouts/takeout and that sort of thing. From your error it seems to be that there is an invalid data field in the JSON file. You could open it manually and then navigate to position 7578 to see what it is.
It is showing an error
Traceback (most recent call last):
File "/Users/Desktop/hangouts/hangouts.py", line 147, in
with open('Hangouts.json', 'r') as f:
IOError: [Errno 2] No such file or directory: 'Hangouts.json'