# MIT License # Copyright (c) 2016 Chandler Abraham # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from slacker import Slacker import json import argparse import os # This script finds all channels, private channels and direct messages # that your user participates in, downloads the complete history for # those converations and writes each conversation out to seperate json files. # # This user centric history gathering is nice because the official slack data exporter # only exports public channels. # # PS, this only works if your slack team has a paid account which allows for unlimited history. # # PPS, this use of the API is blessed by Slack. # https://get.slack.help/hc/en-us/articles/204897248 # " If you want to export the contents of your own private groups and direct messages # please see our API documentation." # # get your slack user token at the bottom of this page # https://api.slack.com/web # # dependencies: # pip install slacker # https://github.com/os/slacker # # usage examples # python slack_history.py --token='123token' # python slack_history.py --token='123token' --dryRun=True # python slack_history.py --token='123token' --skipDirectMessages # python slack_history.py --token='123token' --skipDirectMessages --skipPrivateChannels # fetches the complete message history for a channel/group/im # # pageableObject could be: # slack.channel # slack.groups # slack.im # # channelId is the id of the channel/group/im you want to download history for. def getHistory(pageableObject, channelId, pageSize = 100): messages = [] lastTimestamp = None while(True): response = pageableObject.history( channel = channelId, latest = lastTimestamp, oldest = 0, count = pageSize ).body messages.extend(response['messages']) if (response['has_more'] == True): lastTimestamp = messages[-1]['ts'] # -1 means last element in a list else: break return messages def mkdir(directory): if not os.path.exists(directory): os.makedirs(directory) # fetch and write history for all public channels def getChannels(slack, dryRun): channels = slack.channels.list().body['channels'] print("\nfound channels: ") for channel in channels: print(channel['name']) if not dryRun: parentDir = "channels" mkdir(parentDir) for channel in channels: print("getting history for channel {0}".format(channel['name'])) fileName = "{parent}/{file}.json".format(parent = parentDir, file = channel['name']) messages = getHistory(slack.channels, channel['id']) channelInfo = slack.channels.info(channel['id']).body['channel'] with open(fileName, 'w') as outFile: print("writing {0} records to {1}".format(len(messages), fileName)) json.dump({'channel_info': channelInfo, 'messages': messages }, outFile, indent=4) # fetch and write history for all direct message conversations # also known as IMs in the slack API. def getDirectMessages(slack, ownerId, userIdNameMap, dryRun): dms = slack.im.list().body['ims'] print("\nfound direct messages (1:1) with the following users:") for dm in dms: print(userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)")) if not dryRun: parentDir = "direct_messages" mkdir(parentDir) for dm in dms: name = userIdNameMap.get(dm['user'], dm['user'] + " (name unknown)") print("getting history for direct messages with {0}".format(name)) fileName = "{parent}/{file}.json".format(parent = parentDir, file = name) messages = getHistory(slack.im, dm['id']) channelInfo = {'members': [dm['user'], ownerId]} with open(fileName, 'w') as outFile: print("writing {0} records to {1}".format(len(messages), fileName)) json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4) # fetch and write history for all private channels # also known as groups in the slack API. def getPrivateChannels(slack, dryRun): groups = slack.groups.list().body['groups'] print("\nfound private channels:") for group in groups: print("{0}: ({1} members)".format(group['name'], len(group['members']))) if not dryRun: parentDir = "private_channels" mkdir(parentDir) for group in groups: messages = [] print("getting history for private channel {0} with id {1}".format(group['name'], group['id'])) fileName = "{parent}/{file}.json".format(parent = parentDir, file = group['name']) messages = getHistory(slack.groups, group['id']) channelInfo = slack.groups.info(group['id']).body['group'] with open(fileName, 'w') as outFile: print("writing {0} records to {1}".format(len(messages), fileName)) json.dump({'channel_info': channelInfo, 'messages': messages}, outFile, indent=4) # fetch all users for the channel and return a map userId -> userName def getUserMap(slack): #get all users in the slack organization users = slack.users.list().body['members'] userIdNameMap = {} for user in users: userIdNameMap[user['id']] = user['name'] print("found {0} users ".format(len(users))) return userIdNameMap # get basic info about the slack channel to ensure the authentication token works def doTestAuth(slack): testAuth = slack.auth.test().body teamName = testAuth['team'] currentUser = testAuth['user'] print("Successfully authenticated for team {0} and user {1} ".format(teamName, currentUser)) return testAuth if __name__ == "__main__": parser = argparse.ArgumentParser(description='download slack history') parser.add_argument('--token', help="an api token for a slack user") parser.add_argument( '--dryRun', action='store_true', default=False, help="if dryRun is true, don't fetch/write history only get channel names") parser.add_argument( '--skipPrivateChannels', action='store_true', default=False, help="skip fetching history for private channels") parser.add_argument( '--skipChannels', action='store_true', default=False, help="skip fetching history for channels") parser.add_argument( '--skipDirectMessages', action='store_true', default=False, help="skip fetching history for directMessages") args = parser.parse_args() slack = Slacker(args.token) testAuth = doTestAuth(slack) userIdNameMap = getUserMap(slack) dryRun = args.dryRun if not dryRun: with open('metadata.json', 'w') as outFile: print("writing metadata") metadata = { 'auth_info': testAuth, 'users': userIdNameMap } json.dump(metadata, outFile, indent=4) if not args.skipChannels: getChannels(slack, dryRun) if not args.skipPrivateChannels: getPrivateChannels(slack, dryRun) if not args.skipDirectMessages: getDirectMessages(slack, testAuth['user_id'], userIdNameMap, dryRun)