philip-bl · September 30, 2016 16:29
diff --git a/kids_shoes_experiment.ipynb b/kids_shoes_experiment.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from math import factorial\n",
    "from distcolors import get_distinguishable_colors\n",
    "from functools import reduce\n",
    "from itertools import zip_longest, permutations, product"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy.random import permutation, shuffle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# pacaur -S python-tabulate\n",
    "# or\n",
    "# sudo -H pip install tabulate\n",
    "from tabulate import tabulate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def even(number):\n",
    "    assert number == int(number)\n",
    "    return number % 2 == 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def calc_prob_theoretically(n):\n",
    "    '''Calculates probability that no child wears his pair of boots on the way home.\n",
    "    The calculation is performed via the theoretically obtained formula.'''\n",
    "    return 1 - sum(\n",
    "        (-1 if even(i) else 1) *\n",
    "        factorial(n - i) / factorial(n) / factorial(i)\n",
    "        for i in range(1, n + 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_experiments(n, samples):\n",
    "    # 2 because that's the number of shoes per child\n",
    "    not_shuffled = np.tile(range(1, n + 1), (samples, 2, 1))\n",
    "    return np.apply_along_axis(permutation, 2, not_shuffled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def has_shoes_match(experiment):\n",
    "    '''Takes (2, n)-dim ndarray as input.\n",
    "    Returns True if there is a kid who left in own shoes.'''\n",
    "    pairs = np.transpose(experiment) # it's now (n, 2)\n",
    "    n = pairs.shape[0]\n",
    "    zipped_with_index = zip_longest(range(1, n + 1), pairs)\n",
    "    return any(index == left and index == right for (index, [left, right]) in zipped_with_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def calc_prob_experimentally(num_kids, samples):\n",
    "    '''Calculates probability that no child wears his pair of boots on the way home.\n",
    "    Creates the experiment using random number generators and counts number of successes.'''\n",
    "    data = generate_experiments(num_kids, samples)\n",
    "    count_matches = sum(has_shoes_match(experiment) for experiment in data)\n",
    "    return 1 - count_matches / samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def calc_prob_counting_permutations(num_kids):\n",
    "    '''Calculates probability that no child wears his pair of boots on the way home.\n",
    "    Lists all permutations and counts the successes. Runs very slowly for high num_kids.'''\n",
    "    assert num_kids < 7 # 7 and more takes too long\n",
    "    single_leg_permuts = list(permutations(range(1, num_kids + 1)))\n",
    "    all_permuts = list(product(single_leg_permuts, single_leg_permuts))\n",
    "    matches = [has_shoes_match(superrow) for superrow in all_permuts]\n",
    "    return 1 - sum(matches) / len(matches)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def row_for_table(num_kids, samples):\n",
    "    exp_prob = calc_prob_experimentally(num_kids, samples)\n",
    "    theor_prob = calc_prob_theoretically(num_kids)\n",
    "    abs_difference = abs(exp_prob - theor_prob)\n",
    "    permuts_count_probability = calc_prob_counting_permutations(num_kids) \\\n",
    "        if num_kids < 7 \\\n",
    "        else 'takes too long'\n",
    "    return [num_kids, exp_prob, theor_prob, abs_difference, permuts_count_probability]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def print_fancy_table():\n",
    "    samples = 10000\n",
    "    kids = range(2, 13, 1)\n",
    "    headers = ['num kids', 'experimental prob', 'theoretical prob',\n",
    "               'abs difference', 'count-permut prob']\n",
    "    table = [row_for_table(num_kids, samples) for num_kids in kids]\n",
    "    print(tabulate(table, headers=headers))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  num kids    experimental prob    theoretical prob    abs difference  count-permut prob\n",
      "----------  -------------------  ------------------  ----------------  -------------------\n",
      "         2               0.7555            0.75           0.0055       0.75\n",
      "         3               0.7257            0.722222       0.00347778   0.7222222222222222\n",
      "         4               0.7848            0.786458       0.00165833   0.7864583333333334\n",
      "         5               0.8226            0.8225         0.0001       0.8225\n",
      "         6               0.8552            0.848717       0.00648279   0.8487172067901234\n",
      "         7               0.8701            0.868301       0.0017995    takes too long\n",
      "         8               0.8848            0.883456       0.00134383   takes too long\n",
      "         9               0.896             0.895516       0.000484105  takes too long\n",
      "        10               0.9036            0.905332       0.00173207   takes too long\n",
      "        11               0.9105            0.913473       0.00297313   takes too long\n",
      "        12               0.9224            0.920332       0.0020683    takes too long\n"
     ]
    }
   ],
   "source": [
    "print_fancy_table()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Looks like theoretical formula is right"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"from math import factorial\n",
	"from distcolors import get_distinguishable_colors\n",
	"from functools import reduce\n",
	"from itertools import zip_longest, permutations, product"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"from numpy.random import permutation, shuffle"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# pacaur -S python-tabulate\n",
	"# or\n",
	"# sudo -H pip install tabulate\n",
	"from tabulate import tabulate"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def even(number):\n",
	" assert number == int(number)\n",
	" return number % 2 == 0"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def calc_prob_theoretically(n):\n",
	" '''Calculates probability that no child wears his pair of boots on the way home.\n",
	" The calculation is performed via the theoretically obtained formula.'''\n",
	" return 1 - sum(\n",
	" (-1 if even(i) else 1) *\n",
	" factorial(n - i) / factorial(n) / factorial(i)\n",
	" for i in range(1, n + 1))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def generate_experiments(n, samples):\n",
	" # 2 because that's the number of shoes per child\n",
	" not_shuffled = np.tile(range(1, n + 1), (samples, 2, 1))\n",
	" return np.apply_along_axis(permutation, 2, not_shuffled)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def has_shoes_match(experiment):\n",
	" '''Takes (2, n)-dim ndarray as input.\n",
	" Returns True if there is a kid who left in own shoes.'''\n",
	" pairs = np.transpose(experiment) # it's now (n, 2)\n",
	" n = pairs.shape[0]\n",
	" zipped_with_index = zip_longest(range(1, n + 1), pairs)\n",
	" return any(index == left and index == right for (index, [left, right]) in zipped_with_index)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def calc_prob_experimentally(num_kids, samples):\n",
	" '''Calculates probability that no child wears his pair of boots on the way home.\n",
	" Creates the experiment using random number generators and counts number of successes.'''\n",
	" data = generate_experiments(num_kids, samples)\n",
	" count_matches = sum(has_shoes_match(experiment) for experiment in data)\n",
	" return 1 - count_matches / samples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def calc_prob_counting_permutations(num_kids):\n",
	" '''Calculates probability that no child wears his pair of boots on the way home.\n",
	" Lists all permutations and counts the successes. Runs very slowly for high num_kids.'''\n",
	" assert num_kids < 7 # 7 and more takes too long\n",
	" single_leg_permuts = list(permutations(range(1, num_kids + 1)))\n",
	" all_permuts = list(product(single_leg_permuts, single_leg_permuts))\n",
	" matches = [has_shoes_match(superrow) for superrow in all_permuts]\n",
	" return 1 - sum(matches) / len(matches)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def row_for_table(num_kids, samples):\n",
	" exp_prob = calc_prob_experimentally(num_kids, samples)\n",
	" theor_prob = calc_prob_theoretically(num_kids)\n",
	" abs_difference = abs(exp_prob - theor_prob)\n",
	" permuts_count_probability = calc_prob_counting_permutations(num_kids) \\\n",
	" if num_kids < 7 \\\n",
	" else 'takes too long'\n",
	" return [num_kids, exp_prob, theor_prob, abs_difference, permuts_count_probability]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def print_fancy_table():\n",
	" samples = 10000\n",
	" kids = range(2, 13, 1)\n",
	" headers = ['num kids', 'experimental prob', 'theoretical prob',\n",
	" 'abs difference', 'count-permut prob']\n",
	" table = [row_for_table(num_kids, samples) for num_kids in kids]\n",
	" print(tabulate(table, headers=headers))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" num kids experimental prob theoretical prob abs difference count-permut prob\n",
	"---------- ------------------- ------------------ ---------------- -------------------\n",
	" 2 0.7555 0.75 0.0055 0.75\n",
	" 3 0.7257 0.722222 0.00347778 0.7222222222222222\n",
	" 4 0.7848 0.786458 0.00165833 0.7864583333333334\n",
	" 5 0.8226 0.8225 0.0001 0.8225\n",
	" 6 0.8552 0.848717 0.00648279 0.8487172067901234\n",
	" 7 0.8701 0.868301 0.0017995 takes too long\n",
	" 8 0.8848 0.883456 0.00134383 takes too long\n",
	" 9 0.896 0.895516 0.000484105 takes too long\n",
	" 10 0.9036 0.905332 0.00173207 takes too long\n",
	" 11 0.9105 0.913473 0.00297313 takes too long\n",
	" 12 0.9224 0.920332 0.0020683 takes too long\n"
	]
	}
	],
	"source": [
	"print_fancy_table()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Looks like theoretical formula is right"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}