Last active
December 10, 2021 04:45
-
-
Save kahuang/e9771ed920e7afe78dc803265a0767c5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import zipfile | |
import getopt | |
import sys | |
import re | |
import json | |
import codecs | |
opts, unparsedArgs = getopt.getopt(sys.argv[1:], "f:") | |
zipFileName = None | |
for o, a in opts: | |
if o == "-f": | |
zipFileName = a | |
if zipFileName == None: | |
print("The `-f exportFile.zip` argument is required") | |
sys.exit(2) | |
zf = zipfile.ZipFile(zipFileName) | |
userIDToUsername = {} | |
with zf.open("users.json") as f: | |
jsonUsers = json.loads(f.read().decode("utf-8"))["users"] | |
for jsonUser in jsonUsers: | |
userIDToUsername[jsonUser["user"]] = jsonUser.get("displayName") or jsonUser.get("name") or "NoNameUser" | |
channelIDToParentID = {} | |
channelNameByID = {} | |
with zf.open("channels.json") as f: | |
jsonChannels = json.loads(f.read().decode("utf-8"))["channels"] | |
for jsonChannel in jsonChannels: | |
channelNameByID[jsonChannel["channel"]] = jsonChannel["name"] | |
channelIDToParentID[jsonChannel["channel"]] = jsonChannel.get("parent") or "" | |
files = zf.namelist() | |
lines = [] | |
threadChannelNameToThreadID = {} | |
for file in files: | |
matched = re.match("(.*)-thread-messages-[0-9]*.json", file) | |
if matched: | |
folderAndThreadID = matched.group(1) | |
threadTitle = "UnknownThread" | |
with zf.open(folderAndThreadID + "-thread.json") as f: | |
thread = json.loads(f.read().decode("utf-8")) | |
threadTitle = thread.get("title") or "UnnamedThread" | |
with zf.open(file) as f: | |
jsonMessages = json.loads(f.read().decode("utf-8"))["messages"] | |
# We want to process these in ascending-chron order to match what the CSV expects | |
jsonMessages.reverse() | |
while jsonMessages: | |
jsonMessage = jsonMessages.pop(0) | |
unixSeconds = jsonMessage["created"] / 1000 | |
username = userIDToUsername.get(jsonMessage["user"]) or "UnknownUser" | |
channelName = channelNameByID.get(jsonMessage["channel"]) or "UnknownChannel" | |
channelName = channelName + "-" + threadTitle | |
channelName = channelName.replace(" ", "-").lower() # Make it look Slack-y | |
channelName = channelName.replace("\"", "").lower() # Remove any quotes so the csv is well-formatted | |
channelName = channelName[:min(len(channelName), 80)] # Max channel name in Slack is 80 chars | |
workingChannelName = channelName | |
iteration = 1 | |
while (workingChannelName in threadChannelNameToThreadID) and (threadChannelNameToThreadID[workingChannelName] != jsonMessage["thread"]): | |
iterationStr = "-" + str(iteration) | |
workingChannelName = channelName[:min(len(channelName), 80 - len(iterationStr))] + iterationStr | |
iteration = iteration + 1 | |
channelName = workingChannelName | |
threadChannelNameToThreadID[channelName] = jsonMessage["thread"] | |
messageStr = "UNSUPPORTED" | |
textBody = jsonMessage.get("text") | |
linkBody = jsonMessage.get("link") | |
fileBody = jsonMessage.get("file") | |
quoteBody = jsonMessage.get("quote") | |
integrationMessageBody = jsonMessage.get("integration_message") | |
if jsonMessage.get("deleted"): | |
messageStr = "This message was deleted." | |
elif jsonMessage.get("moved"): | |
continue | |
elif textBody: | |
messageStr = textBody["body"] | |
elif linkBody: | |
messageStr = linkBody["url"] | |
elif fileBody: | |
messageStr = fileBody["previewName"] + ": " + fileBody["link"] | |
elif quoteBody: | |
quotedMessages = quoteBody["quoted"] | |
messageStr = "Quoted " + str(len(quotedMessages)) + " messages:" | |
for quotedMessage in quotedMessages: | |
quotedMessage["created"] = jsonMessage["created"] # Override the quoted message's timestamp so it sorts correctly | |
# Quoted messages are already in ascending chron order so just put them at the beginning of the list and they'll get popped next. | |
jsonMessages = quotedMessages + jsonMessages | |
elif integrationMessageBody: | |
messageStr = integrationMessageBody["encoded"] | |
messageStr = messageStr.replace('"', '\\"').replace("\\\\\"", "\\\\\\\"") | |
lines.append("\"" + str(unixSeconds) + "\",\"" + channelName + "\",\"" + username + "\",\"" + messageStr + "\"") | |
def lineSortKey(line): | |
line[:12] # Just the timestamp portion of the line, otherwise keep the original order. | |
if __name__ == '__main__': | |
try: | |
lines = sorted(lines, key=lineSortKey) | |
except Exception as e: | |
print("Sorting exception") | |
outFileName = zipFileName.replace("zip", "csv") | |
outFile = codecs.open(outFileName, 'w', encoding='utf-8') | |
for message in lines: | |
outFile.write(message + "\n") | |
outFile.close() | |
print("Done transforming, results saved in " + outFileName) | |
print("How to import this file into Slack:") | |
print("- Go to Settings & Administration -> Workspace Settings") | |
print("- At the top right of the screen there's an \"Import/Export Data\" button") | |
print("- Choose the \"CSV/Text File\" option") | |
print("- Chose this generated file as the \"Your CSV file\" and keep the default delimiter, click \"Start Import\"") | |
print("\n") | |
print("This import may take several days if you have a large Quill team with many threads/messages.") | |
print("\n") | |
print("Due to limitations of the Slack import tool, every Quill thread is turned into a separate Slack channel. You may want to prune your Quill workspace before exporting (or manually remove lines from the CSV) to limit how many channels get created. The user that does the import will be automatically subscribed to all of the imported channels, which makes the Slack app unusable if you have a very large number of channels. We recommend creating a dummy account to do the import.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I think the function should be:
if I'm not mistaken