Forked from kahuang/export-transform-to-slack-csv.py
Last active
December 10, 2021 04:19
-
-
Save colmanhumphrey/d24a295e2fbc37c56b3a3cdf2ae0bd5f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import zipfile | |
import getopt | |
import sys | |
import re | |
import json | |
import codecs | |
opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:") | |
zipFileName = None | |
for o, a in opts: | |
if o == "-f": | |
zipFileName = a | |
if zipFileName == None: | |
print("The `-f exportFile.zip` argument is required") | |
sys.exit(2) | |
zf = zipfile.ZipFile(zipFileName) | |
userIDToUsername = {} | |
with zf.open("users.json") as f: | |
jsonUsers = json.loads(f.read().decode("utf-8"))["users"] | |
for jsonUser in jsonUsers: | |
userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser" | |
channelIDToParentID = {} | |
channelNameByID = {} | |
with zf.open("channels.json") as f: | |
jsonChannels = json.loads(f.read().decode("utf-8"))["channels"] | |
for jsonChannel in jsonChannels: | |
channelNameByID[jsonChannel["channel"]] = jsonChannel["name"] | |
channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or "" | |
files = zf.namelist() | |
lines = [] | |
threadChannelNameToThreadID = {} | |
for file in files: | |
matched = re.match("(.*)-thread-messages-[0-9]*.json", file) | |
if matched: | |
folderAndThreadID = matched.group(1) | |
threadTitle = "UnknownThread" | |
with zf.open(folderAndThreadID + "-thread.json") as f: | |
thread = json.loads(f.read().decode("utf-8")) | |
threadTitle = thread.get("title") or "UnnamedThread" | |
first = True | |
with zf.open(file) as f: | |
jsonMessages = json.loads(f.read().decode("utf-8"))["messages"] | |
# We want to process these in ascending-chron order to match what the CSV expects | |
jsonMessages.reverse() | |
while jsonMessages: | |
jsonMessage = jsonMessages.pop(0) | |
unixSeconds = round(jsonMessage["created"] / 1000) | |
username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser" | |
channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel" | |
workingThreadTitle = threadTitle | |
channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y | |
channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted | |
channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars | |
if channelName == "unknownchannel": | |
continue | |
workingThreadTitle = workingThreadTitle.replace("\"", "") # Remove any quotes so the csv is well-formatted | |
workingThreadTitle = workingThreadTitle.replace("\n", " ") # Replace newlines for a flat look | |
workingThreadTitle = workingThreadTitle[:min(len(workingThreadTitle), 120)] # Avoid excessing titles... | |
if first: | |
lines.append("\"" + str(unixSeconds - 1) + "\",\"" + channelName + "\",\"new-quill-thread\",\"_New thread_\n*" + workingThreadTitle + "*\"") | |
first = False | |
messageStr = "UNSUPPORTED" | |
textBody = jsonMessage.get("text") | |
linkBody = jsonMessage.get("link") | |
fileBody = jsonMessage.get("file") | |
quoteBody = jsonMessage.get("quote") | |
integrationMessageBody = jsonMessage.get("integration_message") | |
if jsonMessage.get("deleted"): | |
continue | |
elif jsonMessage.get("moved"): | |
continue | |
elif textBody: | |
messageStr = textBody["body"] | |
elif linkBody: | |
messageStr = linkBody["url"] | |
elif fileBody: | |
messageStr = fileBody["previewName"] + ": " + fileBody["link"] | |
elif quoteBody: | |
quotedMessages = quoteBody["quoted"] | |
messageStr = "Quoted " + str(len(quotedMessages)) + " messages:" | |
for quotedMessage in quotedMessages: | |
quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly | |
# Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next. | |
jsonMessages = quotedMessages + jsonMessages | |
elif integrationMessageBody: | |
messageStr = integrationMessageBody["encoded"] | |
messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"") | |
lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"") | |
def lineSortKey(line): | |
return line[:12] # Just the timestamp portion of the line, otherwise keep the original order. | |
if __name__ == '__main__': | |
try: | |
lines = sorted(lines, key=lineSortKey) | |
except Exception as e: | |
print("Sorting exception") | |
outFileName = zipFileName.replace("zip", "csv") | |
outFile = codecs.open(outFileName, 'w', encoding='utf-8') | |
for message in lines: | |
outFile.write(message + "\n") | |
outFile.close() | |
print("Done transforming, results saved in " + outFileName) | |
print("How to import this file into Slack:") | |
print("- Go to Settings & Administration -> Workspace Settings") | |
print("- At the top right of the screen there's an \"Import/Export Data\" button") | |
print("- Choose the \"CSV/Text File\" option") | |
print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"") | |
print("\n") | |
print("This import may take several days if you have a large Quill team with many threads/messages.") | |
print("\n") | |
print("This version puts all threads within their channel, with a new user `new-quill-thread` posting the thread name prior to the thread starting.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment