baldwint · June 19, 2016 07:24
diff --git a/slack-export.ipynb b/slack-export.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Use this notebook to archive a JSON copy of all the Slack history that you have access to: channels, DMs, multi-party DMs, and files.\n",
    "\n",
    "You'll need Python 3 with the `slacker` module (`pip3 install slacker`). Pandas is optional."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from slacker import Slacker"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You'll also need to generate a test token at https://api.slack.com/web and paste it in the variable below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "TOKEN = 'my-slack-token-goes-here'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "slack = Slacker(TOKEN)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Users list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "users = slack.users.list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('users.json', 'w') as f:\n",
    "    f.write(users.raw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "members = pd.DataFrame.from_records(users.body['members'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "members.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "members.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "members[['id', 'name','real_name', 'is_admin', 'is_owner']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Channels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "channels = slack.channels.list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('channels.json', 'w') as f:\n",
    "    f.write(channels.raw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "channels.body.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_chs = pd.DataFrame.from_records(channels.body['channels'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_chs.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_chs[['id', 'name', 'creator', 'num_members', 'is_member']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## channel message history\n",
    "\n",
    "API returns messages in batches, so make a function to repeatedly get them all:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "channels.body.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_full_history(channel_id, source=slack.channels):\n",
    "    messages = []\n",
    "    has_more = True\n",
    "    latest = None\n",
    "    while has_more:\n",
    "        resp = source.history(channel_id, latest=latest, count=1000)\n",
    "        messages.extend(resp.body['messages'])\n",
    "        try:\n",
    "            latest = resp.body['messages'][-1]['ts']\n",
    "        except IndexError:\n",
    "            # no messages\n",
    "            has_more = False\n",
    "        else:\n",
    "            has_more = resp.body['has_more']\n",
    "    return messages"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "test this function out on #random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "hist = get_full_history('C0GCU93DY')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(hist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "loop over all channels:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def backup_all(iterable, source, fn_fmt=\"%s.json\"):\n",
    "    stats = []\n",
    "    for status in gen_backup(iterable, source, fn_fmt=fn_fmt):\n",
    "        print(status)\n",
    "        stats.append(status)\n",
    "    return stats\n",
    "\n",
    "def gen_backup(iterable, source, fn_fmt=\"%s.json\"):\n",
    "    for dic in iterable:\n",
    "        id_ = dic['id']\n",
    "        filename = fn_fmt % id_\n",
    "        hist = get_full_history(id_, source=source)\n",
    "        with open(filename, 'w') as fl:\n",
    "            json.dump(dict(history=hist, **dic), fl)\n",
    "        yield id_, len(hist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.makedirs('channels', exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stats = backup_all(channels.body['channels'],\n",
    "                   source=slack.channels,\n",
    "                   fn_fmt=\"channels/%s.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "woohoo, all the message histories are now saved"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "nums = pd.DataFrame.from_records(stats, columns=['id', 'num_msgs'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "out of curiosity, see how many messages each channel had"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.merge(df_chs, nums, on='id')[['id', 'name', 'num_msgs', 'num_members']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DMs\n",
    "\n",
    "use the same technique to get direct message histories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ims = slack.im.list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('ims.json', 'w') as f:\n",
    "    f.write(ims.raw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ims.body.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.DataFrame.from_records(ims.body['ims'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "os.makedirs('ims', exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stats = backup_all(ims.body['ims'],\n",
    "                   source=slack.im,\n",
    "                   fn_fmt=\"ims/%s.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MPIMs\n",
    "\n",
    "multi-party direct messages are structured differently"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "mpims = slack.mpim.list()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('mpims.json', 'w') as f:\n",
    "    f.write(mpims.raw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "mpims.body.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.DataFrame.from_records(mpims.body['groups'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "os.makedirs('mpims', exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "stats = backup_all(mpims.body['groups'],\n",
    "                   source=slack.mpim,\n",
    "                   fn_fmt=\"mpims/%s.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Private Groups\n",
    "\n",
    "I never used private groups, so this notebook doesn't cover that, but it would be a straightforward addition I think. They are structured similarly to MPIMs."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "slack.files.list?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Like messages, files are batched (paginated) in the api. My team had less than 200 files, and I was lazy, so I just bumped up the count until they all fit on one page. YMMV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "r_files = slack.files.list(count=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('files.json', 'w') as f:\n",
    "    f.write(r_files.raw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "r_files.body.keys()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following output tells you whether the files all fit on one page or not"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "r_files.body['paging']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_files = pd.DataFrame.from_records(r_files.body['files'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_files.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_files.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pd.set_option('max_colwidth', 100) # to see the whole url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_files[['id', 'title', 'url_private', 'url_private_download', 'filetype']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "some files have no download link, these are basically all google docs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "mask = df_files['url_private_download'].isnull()\n",
    "df_files.ix[mask]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "now go through and download a copy of every file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def grab_file(file_):\n",
    "    id_ = file_['id']\n",
    "    os.makedirs('files/' + id_, exist_ok=True)\n",
    "    localfn = 'files/%s/%s' % (id_, file_['name'])\n",
    "    remotefn = file_.get('url_private_download')\n",
    "    if remotefn is None:\n",
    "        # probably a google doc, can't download\n",
    "        return\n",
    "    with open(localfn, 'wb') as fn:\n",
    "        r = requests.get(remotefn, headers={'Authorization': 'Bearer %s' % TOKEN}, stream=True)\n",
    "        for block in r.iter_content(1024):\n",
    "            fn.write(block)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for file_ in r_files.body['files']:\n",
    "    grab_file(file_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You are up to date. 🎉"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Use this notebook to archive a JSON copy of all the Slack history that you have access to: channels, DMs, multi-party DMs, and files.\n",
	"\n",
	"You'll need Python 3 with the `slacker` module (`pip3 install slacker`). Pandas is optional."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"from slacker import Slacker"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"You'll also need to generate a test token at https://api.slack.com/web and paste it in the variable below."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"TOKEN = 'my-slack-token-goes-here'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"slack = Slacker(TOKEN)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Users list"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"users = slack.users.list()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"with open('users.json', 'w') as f:\n",
	" f.write(users.raw)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"members = pd.DataFrame.from_records(users.body['members'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"members.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"members.head(1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"members[['id', 'name','real_name', 'is_admin', 'is_owner']]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Channels"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"channels = slack.channels.list()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"with open('channels.json', 'w') as f:\n",
	" f.write(channels.raw)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"channels.body.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_chs = pd.DataFrame.from_records(channels.body['channels'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_chs.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_chs[['id', 'name', 'creator', 'num_members', 'is_member']]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## channel message history\n",
	"\n",
	"API returns messages in batches, so make a function to repeatedly get them all:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"channels.body.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def get_full_history(channel_id, source=slack.channels):\n",
	" messages = []\n",
	" has_more = True\n",
	" latest = None\n",
	" while has_more:\n",
	" resp = source.history(channel_id, latest=latest, count=1000)\n",
	" messages.extend(resp.body['messages'])\n",
	" try:\n",
	" latest = resp.body['messages'][-1]['ts']\n",
	" except IndexError:\n",
	" # no messages\n",
	" has_more = False\n",
	" else:\n",
	" has_more = resp.body['has_more']\n",
	" return messages"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"test this function out on #random"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"hist = get_full_history('C0GCU93DY')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"len(hist)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import json"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"loop over all channels:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"def backup_all(iterable, source, fn_fmt=\"%s.json\"):\n",
	" stats = []\n",
	" for status in gen_backup(iterable, source, fn_fmt=fn_fmt):\n",
	" print(status)\n",
	" stats.append(status)\n",
	" return stats\n",
	"\n",
	"def gen_backup(iterable, source, fn_fmt=\"%s.json\"):\n",
	" for dic in iterable:\n",
	" id_ = dic['id']\n",
	" filename = fn_fmt % id_\n",
	" hist = get_full_history(id_, source=source)\n",
	" with open(filename, 'w') as fl:\n",
	" json.dump(dict(history=hist, **dic), fl)\n",
	" yield id_, len(hist)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import os\n",
	"\n",
	"os.makedirs('channels', exist_ok=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"stats = backup_all(channels.body['channels'],\n",
	" source=slack.channels,\n",
	" fn_fmt=\"channels/%s.json\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"woohoo, all the message histories are now saved"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"nums = pd.DataFrame.from_records(stats, columns=['id', 'num_msgs'])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"out of curiosity, see how many messages each channel had"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pd.merge(df_chs, nums, on='id')[['id', 'name', 'num_msgs', 'num_members']]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# DMs\n",
	"\n",
	"use the same technique to get direct message histories"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"ims = slack.im.list()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"with open('ims.json', 'w') as f:\n",
	" f.write(ims.raw)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"ims.body.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pd.DataFrame.from_records(ims.body['ims'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"os.makedirs('ims', exist_ok=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"stats = backup_all(ims.body['ims'],\n",
	" source=slack.im,\n",
	" fn_fmt=\"ims/%s.json\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# MPIMs\n",
	"\n",
	"multi-party direct messages are structured differently"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"mpims = slack.mpim.list()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"with open('mpims.json', 'w') as f:\n",
	" f.write(mpims.raw)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"mpims.body.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pd.DataFrame.from_records(mpims.body['groups'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"os.makedirs('mpims', exist_ok=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"stats = backup_all(mpims.body['groups'],\n",
	" source=slack.mpim,\n",
	" fn_fmt=\"mpims/%s.json\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Private Groups\n",
	"\n",
	"I never used private groups, so this notebook doesn't cover that, but it would be a straightforward addition I think. They are structured similarly to MPIMs."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Files"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"slack.files.list?"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Like messages, files are batched (paginated) in the api. My team had less than 200 files, and I was lazy, so I just bumped up the count until they all fit on one page. YMMV"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"r_files = slack.files.list(count=200)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"with open('files.json', 'w') as f:\n",
	" f.write(r_files.raw)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"r_files.body.keys()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"The following output tells you whether the files all fit on one page or not"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"r_files.body['paging']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_files = pd.DataFrame.from_records(r_files.body['files'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_files.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_files.head(1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pd.set_option('max_colwidth', 100) # to see the whole url"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"df_files[['id', 'title', 'url_private', 'url_private_download', 'filetype']]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"some files have no download link, these are basically all google docs:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"mask = df_files['url_private_download'].isnull()\n",
	"df_files.ix[mask]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"now go through and download a copy of every file"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"import requests"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"def grab_file(file_):\n",
	" id_ = file_['id']\n",
	" os.makedirs('files/' + id_, exist_ok=True)\n",
	" localfn = 'files/%s/%s' % (id_, file_['name'])\n",
	" remotefn = file_.get('url_private_download')\n",
	" if remotefn is None:\n",
	" # probably a google doc, can't download\n",
	" return\n",
	" with open(localfn, 'wb') as fn:\n",
	" r = requests.get(remotefn, headers={'Authorization': 'Bearer %s' % TOKEN}, stream=True)\n",
	" for block in r.iter_content(1024):\n",
	" fn.write(block)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"for file_ in r_files.body['files']:\n",
	" grab_file(file_)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"You are up to date. 🎉"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.4.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}