Created
June 20, 2016 05:26
-
-
Save brenttheisen/d9d77f636b0b5d8d1d3e7d85f4c9fbcc to your computer and use it in GitHub Desktop.
Script for printing a CSV of a conversation in a Hangout JSON file from Google Takeout.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Script for printing a CSV of a conversation in a Hangout JSON file from Google Takeout. | |
# | |
# Brent Theisen | |
# | |
require 'json' | |
require 'csv' | |
if ARGV.size != 2 | |
abort 'usage: ruby google_takeout_hangout_convo_to_csv.rb <json file> <conversation id>' | |
end | |
FILE = ARGV[0] | |
CONVERSATION_ID = ARGV[1] | |
file_contents = File.read(FILE) ; nil | |
root_element = JSON.parse(file_contents) ; nil | |
root_element['conversation_state'].each do |conversation_element| | |
conversation_id = conversation_element['conversation_state']['conversation_id']['id'] | |
next unless CONVERSATION_ID == conversation_id | |
participant_data_element = conversation_element['conversation_state']['conversation']['participant_data'] | |
participant_name_array = participant_data_element.collect do |participant_element| | |
[ | |
{ | |
'gaia_id' => participant_element['id']['gaia_id'], | |
'chat_id' => participant_element['id']['chat_id'] | |
}, | |
participant_element['fallback_name'] | |
] | |
end | |
participant_name_hash = Hash[participant_name_array] | |
sorted_event_elements = conversation_element["conversation_state"]['event'].sort do |a, b| | |
a['timestamp'] <=> b['timestamp'] | |
end | |
sorted_event_elements.each do |event_element| | |
chat_message_element = event_element['chat_message'] | |
next if chat_message_element.nil? | |
message_content_element = chat_message_element['message_content'] | |
next unless message_content_element['attachment'].nil? | |
text_array = message_content_element['segment'].collect do |segment_element| | |
case segment_element['type'] | |
when 'TEXT', 'LINK' then segment_element['text'] | |
when 'LINE_BREAK' then "\n" | |
else | |
raise "Unknown segment type #{segment_element['type']}!" | |
end | |
end | |
timestamp = Time.at(event_element['timestamp'].to_i / 1000000) | |
sender_name = participant_name_hash[event_element['sender_id']] | |
text = text_array.join | |
puts [timestamp, sender_name, text].to_csv | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment