loisaidasam · May 1, 2020 03:55
diff --git a/story.ipynb b/story.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's a story about numpy ndarrays with structured dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's whip up some sample data and a sample data type ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "sample_data = [\n",
    "    (\n",
    "        \"Sam\",\n",
    "        'black',\n",
    "        27,\n",
    "        [\n",
    "            \"pizza\",\n",
    "            \"sourdough bread\",\n",
    "            \"tacos\",\n",
    "        ],\n",
    "        [\n",
    "            (\"Wicked Weed\", \"Napolean Complex IPA\", 1, 4.5),\n",
    "            (\"Sweetwater\", \"IPA\", 6, 4.0),\n",
    "            (\"Budweiser\", \"Diesel\", 11, 3.0),\n",
    "        ],\n",
    "    ),\n",
    "    (\n",
    "        \"Ryan\",\n",
    "        'blue',\n",
    "        13,\n",
    "        [\n",
    "            \"fish fry\",\n",
    "            \"pulled pork\",\n",
    "            \"fried rice\",\n",
    "        ],\n",
    "        [\n",
    "            (\"Arches\", \"Bohemian Pilsner\", 1, 5),\n",
    "            (\"Monday Night\", \"I'm On A Boat\", 2, 4.5),\n",
    "            (\"Corona\", \"Extra\", 0, 4),\n",
    "        ],\n",
    "    )\n",
    "]\n",
    "\n",
    "dtype = np.dtype([\n",
    "    ('name', '<U32'),\n",
    "    ('favorite_color', '<U10'),\n",
    "    ('favorite_number', 'int32'),\n",
    "    ('top_three_foods', '<U32', (3,)),\n",
    "    (\n",
    "        'beer_in_fridge',\n",
    "        [\n",
    "            ('brewer', '<U32'),\n",
    "            ('name', '<U32'),\n",
    "            ('count', 'int32'),\n",
    "            ('star_rating', 'float32'),\n",
    "        ],\n",
    "        (3,),\n",
    "    ),\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's what our sample data looks like in primitive python:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Sam',\n",
       "  'black',\n",
       "  27,\n",
       "  ['pizza', 'sourdough bread', 'tacos'],\n",
       "  [('Wicked Weed', 'Napolean Complex IPA', 1, 4.5),\n",
       "   ('Sweetwater', 'IPA', 6, 4.0),\n",
       "   ('Budweiser', 'Diesel', 11, 3.0)]),\n",
       " ('Ryan',\n",
       "  'blue',\n",
       "  13,\n",
       "  ['fish fry', 'pulled pork', 'fried rice'],\n",
       "  [('Arches', 'Bohemian Pilsner', 1, 5),\n",
       "   ('Monday Night', \"I'm On A Boat\", 2, 4.5),\n",
       "   ('Corona', 'Extra', 0, 4)])]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And our dtype:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype([('name', '<U32'), ('favorite_color', '<U10'), ('favorite_number', '<i4'), ('top_three_foods', '<U32', (3,)), ('beer_in_fridge', [('brewer', '<U32'), ('name', '<U32'), ('count', '<i4'), ('star_rating', '<f4')], (3,))])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's what the whole shebang looks like as a numpy array:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([('Sam', 'black', 27, ['pizza', 'sourdough bread', 'tacos'], [('Wicked Weed', 'Napolean Complex IPA',  1, 4.5), ('Sweetwater', 'IPA',  6, 4. ), ('Budweiser', 'Diesel', 11, 3. )]),\n",
       "       ('Ryan', 'blue', 13, ['fish fry', 'pulled pork', 'fried rice'], [('Arches', 'Bohemian Pilsner',  1, 5. ), ('Monday Night', \"I'm On A Boat\",  2, 4.5), ('Corona', 'Extra',  0, 4. )])],\n",
       "      dtype=[('name', '<U32'), ('favorite_color', '<U10'), ('favorite_number', '<i4'), ('top_three_foods', '<U32', (3,)), ('beer_in_fridge', [('brewer', '<U32'), ('name', '<U32'), ('count', '<i4'), ('star_rating', '<f4')], (3,))])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people = np.array(sample_data, dtype=dtype)\n",
    "people"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's what one person looks like:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Sam', 'black', 27, ['pizza', 'sourdough bread', 'tacos'], [('Wicked Weed', 'Napolean Complex IPA',  1, 4.5), ('Sweetwater', 'IPA',  6, 4. ), ('Budweiser', 'Diesel', 11, 3. )])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sam = people[0]\n",
    "sam"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Their favorite food:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'pizza'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sam['top_three_foods'][0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A beer that's in their fridge and how they rated it:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IPA by Sweetwater: 4.0 stars (****)\n"
     ]
    }
   ],
   "source": [
    "beer = sam['beer_in_fridge'][1]\n",
    "stars = ''.join(['*' for _ in range(int(beer['star_rating']))])\n",
    "print(\"%s by %s: %s stars (%s)\" % (\n",
    "    beer['name'],\n",
    "    beer['brewer'],\n",
    "    beer['star_rating'],\n",
    "    stars,\n",
    "))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "OK, now let's manipulate the data a bit to see how rigid this structure is...\n",
    "\n",
    "Can we create an ndarray with just one person?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Yep\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    np.array(sample_data[:1], dtype=dtype)\n",
    "    print(\"Yep\")\n",
    "except Exception as exception:\n",
    "    print(\"Nope: %s\" % exception)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Can we create a user who only has ONE favorite food (the dtype explicitly states 3)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Yep\n",
      "[('Brooke', 'pink', 22, ['brussel sprouts', 'brussel sprouts', 'brussel sprouts'], [('Wicked Weed', 'Napolean Complex IPA',  1, 4.5), ('Sweetwater', 'IPA',  6, 4. ), ('Budweiser', 'Diesel', 11, 3. )])]\n",
      "Hmmm, well that's bizarre, it seems to have padded the array with \"brussel sprouts\" three times ...\n"
     ]
    }
   ],
   "source": [
    "sample_data_weird_food_counts = [\n",
    "    (\n",
    "        \"Brooke\",\n",
    "        'pink',\n",
    "        22,\n",
    "        [\n",
    "            \"brussel sprouts\",\n",
    "        ],\n",
    "        [\n",
    "            (\"Wicked Weed\", \"Napolean Complex IPA\", 1, 4.5),\n",
    "            (\"Sweetwater\", \"IPA\", 6, 4.0),\n",
    "            (\"Budweiser\", \"Diesel\", 11, 3.0),\n",
    "        ],\n",
    "    ),\n",
    "]\n",
    "try:\n",
    "    people_one_food = np.array(sample_data_weird_food_counts, dtype=dtype)\n",
    "    print(\"Yep\")\n",
    "    print(people_one_food)\n",
    "    print(\"Hmmm, well that's bizarre, it seems to have padded the array with \\\"brussel sprouts\\\" three times ...\")\n",
    "except Exception as exception:\n",
    "    print(\"Nope: %s\" % exception)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What if there are TWO fav foods (one too few)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "foods: ['brussel sprouts', 'hummus']\n",
      "Nope: cannot copy sequence with size 2 to array axis with dimension 3\n"
     ]
    }
   ],
   "source": [
    "sample_data_weird_food_counts[0][3].append(\"hummus\")\n",
    "print(\"foods: %s\" % sample_data_weird_food_counts[0][3])\n",
    "try:\n",
    "    np.array(sample_data_weird_food_counts, dtype=dtype)\n",
    "    print(\"Yep\")\n",
    "except Exception as exception:\n",
    "    print(\"Nope: %s\" % exception)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "OK. What if there are FOUR fav foods (one too many)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "foods: ['brussel sprouts', 'hummus', 'saltines', 'ketchup']\n",
      "Nope: cannot copy sequence with size 4 to array axis with dimension 3\n"
     ]
    }
   ],
   "source": [
    "sample_data_weird_food_counts[0][3].extend([\"saltines\", \"ketchup\"])\n",
    "print(\"foods: %s\" % sample_data_weird_food_counts[0][3])\n",
    "try:\n",
    "    np.array(sample_data_weird_food_counts, dtype=dtype)\n",
    "    print(\"Yep\")\n",
    "except Exception as exception:\n",
    "    print(\"Nope: %s\" % exception)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Interesting! OK, now let's fox with the dtype a bit ...\n",
    "\n",
    "The dtype descr looks like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('name', '<U32'),\n",
       " ('favorite_color', '<U10'),\n",
       " ('favorite_number', '<i4'),\n",
       " ('top_three_foods', '<U32', (3,)),\n",
       " ('beer_in_fridge',\n",
       "  [('brewer', '<U32'),\n",
       "   ('name', '<U32'),\n",
       "   ('count', '<i4'),\n",
       "   ('star_rating', '<f4')],\n",
       "  (3,))]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtype.descr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And it looks like this when serialized to JSON:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[[\"name\", \"<U32\"], [\"favorite_color\", \"<U10\"], [\"favorite_number\", \"<i4\"], [\"top_three_foods\", \"<U32\", [3]], [\"beer_in_fridge\", [[\"brewer\", \"<U32\"], [\"name\", \"<U32\"], [\"count\", \"<i4\"], [\"star_rating\", \"<f4\"]], [3]]]'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtype_descr_to_json = json.dumps(dtype.descr)\n",
    "dtype_descr_to_json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And here's what that looks like when deserialized from JSON (note all lists, no tuples):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['name', '<U32'],\n",
       " ['favorite_color', '<U10'],\n",
       " ['favorite_number', '<i4'],\n",
       " ['top_three_foods', '<U32', [3]],\n",
       " ['beer_in_fridge',\n",
       "  [['brewer', '<U32'],\n",
       "   ['name', '<U32'],\n",
       "   ['count', '<i4'],\n",
       "   ['star_rating', '<f4']],\n",
       "  [3]]]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtype_descr_from_json = json.loads(dtype_descr_to_json)\n",
    "dtype_descr_from_json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's a handy little recursive function I wrote for deserializing from JSON:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def deserialize_dtype_descr_json(dtype_descr_json):\n",
    "    \"\"\"Recursive method for deserializing nested dtype.descr json blobs\n",
    "\n",
    "    The reason for this is that JSON doesn't distinguish between lists\n",
    "    and tuples, but dtype formatting REQUIRE lists on the outside and\n",
    "    tuples on the inside\n",
    "    \"\"\"\n",
    "    dtype = []\n",
    "    for dtype_entry in dtype_descr_json:\n",
    "        # Each dtype entry will be of size 2 or 3 depending on if it's a\n",
    "        # sequence or not\n",
    "        if isinstance(dtype_entry[1], list):\n",
    "            # The second element is the dtype of the dtype entry, so if\n",
    "            # it's a list (of lists) it has to be converted to a list of\n",
    "            # tuples\n",
    "            dtype_entry[1] = deserialize_dtype_descr_json(dtype_entry[1])\n",
    "        if len(dtype_entry) >= 3:\n",
    "            # TODO: Do we have to deserialize the third element to a\n",
    "            # tuple if it's a list? Seems to work as a list, but doing\n",
    "            # this anyway for safety/conformity.\n",
    "            dtype_entry[2] = tuple(dtype_entry[2])\n",
    "        dtype.append(tuple(dtype_entry))\n",
    "    return dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And here's what that looks like after being deserialized:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('name', '<U32'),\n",
       " ('favorite_color', '<U10'),\n",
       " ('favorite_number', '<i4'),\n",
       " ('top_three_foods', '<U32', (3,)),\n",
       " ('beer_in_fridge',\n",
       "  [('brewer', '<U32'),\n",
       "   ('name', '<U32'),\n",
       "   ('count', '<i4'),\n",
       "   ('star_rating', '<f4')],\n",
       "  (3,))]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtype_descr_from_json_deserialized = deserialize_dtype_descr_json(dtype_descr_from_json)\n",
    "dtype_descr_from_json_deserialized"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Confirming that we can deserialize from JSON properly:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtype.descr == dtype_descr_from_json_deserialized"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Confirming that using this deserialized dtype works properly:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "people_from_deserialized_dtype_descr = np.array(sample_data,\n",
    "                                                dtype=dtype_descr_from_json_deserialized)\n",
    "(people == people_from_deserialized_dtype_descr).all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Here's a story about numpy ndarrays with structured dtypes"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Let's whip up some sample data and a sample data type ..."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import json\n",
	"\n",
	"import numpy as np\n",
	"\n",
	"\n",
	"sample_data = [\n",
	" (\n",
	" \"Sam\",\n",
	" 'black',\n",
	" 27,\n",
	" [\n",
	" \"pizza\",\n",
	" \"sourdough bread\",\n",
	" \"tacos\",\n",
	" ],\n",
	" [\n",
	" (\"Wicked Weed\", \"Napolean Complex IPA\", 1, 4.5),\n",
	" (\"Sweetwater\", \"IPA\", 6, 4.0),\n",
	" (\"Budweiser\", \"Diesel\", 11, 3.0),\n",
	" ],\n",
	" ),\n",
	" (\n",
	" \"Ryan\",\n",
	" 'blue',\n",
	" 13,\n",
	" [\n",
	" \"fish fry\",\n",
	" \"pulled pork\",\n",
	" \"fried rice\",\n",
	" ],\n",
	" [\n",
	" (\"Arches\", \"Bohemian Pilsner\", 1, 5),\n",
	" (\"Monday Night\", \"I'm On A Boat\", 2, 4.5),\n",
	" (\"Corona\", \"Extra\", 0, 4),\n",
	" ],\n",
	" )\n",
	"]\n",
	"\n",
	"dtype = np.dtype([\n",
	" ('name', '<U32'),\n",
	" ('favorite_color', '<U10'),\n",
	" ('favorite_number', 'int32'),\n",
	" ('top_three_foods', '<U32', (3,)),\n",
	" (\n",
	" 'beer_in_fridge',\n",
	" [\n",
	" ('brewer', '<U32'),\n",
	" ('name', '<U32'),\n",
	" ('count', 'int32'),\n",
	" ('star_rating', 'float32'),\n",
	" ],\n",
	" (3,),\n",
	" ),\n",
	"])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Here's what our sample data looks like in primitive python:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('Sam',\n",
	" 'black',\n",
	" 27,\n",
	" ['pizza', 'sourdough bread', 'tacos'],\n",
	" [('Wicked Weed', 'Napolean Complex IPA', 1, 4.5),\n",
	" ('Sweetwater', 'IPA', 6, 4.0),\n",
	" ('Budweiser', 'Diesel', 11, 3.0)]),\n",
	" ('Ryan',\n",
	" 'blue',\n",
	" 13,\n",
	" ['fish fry', 'pulled pork', 'fried rice'],\n",
	" [('Arches', 'Bohemian Pilsner', 1, 5),\n",
	" ('Monday Night', \"I'm On A Boat\", 2, 4.5),\n",
	" ('Corona', 'Extra', 0, 4)])]"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sample_data"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"And our dtype:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dtype([('name', '<U32'), ('favorite_color', '<U10'), ('favorite_number', '<i4'), ('top_three_foods', '<U32', (3,)), ('beer_in_fridge', [('brewer', '<U32'), ('name', '<U32'), ('count', '<i4'), ('star_rating', '<f4')], (3,))])"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dtype"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Here's what the whole shebang looks like as a numpy array:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([('Sam', 'black', 27, ['pizza', 'sourdough bread', 'tacos'], [('Wicked Weed', 'Napolean Complex IPA', 1, 4.5), ('Sweetwater', 'IPA', 6, 4. ), ('Budweiser', 'Diesel', 11, 3. )]),\n",
	" ('Ryan', 'blue', 13, ['fish fry', 'pulled pork', 'fried rice'], [('Arches', 'Bohemian Pilsner', 1, 5. ), ('Monday Night', \"I'm On A Boat\", 2, 4.5), ('Corona', 'Extra', 0, 4. )])],\n",
	" dtype=[('name', '<U32'), ('favorite_color', '<U10'), ('favorite_number', '<i4'), ('top_three_foods', '<U32', (3,)), ('beer_in_fridge', [('brewer', '<U32'), ('name', '<U32'), ('count', '<i4'), ('star_rating', '<f4')], (3,))])"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"people = np.array(sample_data, dtype=dtype)\n",
	"people"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Here's what one person looks like:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"('Sam', 'black', 27, ['pizza', 'sourdough bread', 'tacos'], [('Wicked Weed', 'Napolean Complex IPA', 1, 4.5), ('Sweetwater', 'IPA', 6, 4. ), ('Budweiser', 'Diesel', 11, 3. )])"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sam = people[0]\n",
	"sam"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Their favorite food:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'pizza'"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sam['top_three_foods'][0]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"A beer that's in their fridge and how they rated it:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"IPA by Sweetwater: 4.0 stars (****)\n"
	]
	}
	],
	"source": [
	"beer = sam['beer_in_fridge'][1]\n",
	"stars = ''.join(['*' for _ in range(int(beer['star_rating']))])\n",
	"print(\"%s by %s: %s stars (%s)\" % (\n",
	" beer['name'],\n",
	" beer['brewer'],\n",
	" beer['star_rating'],\n",
	" stars,\n",
	"))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"OK, now let's manipulate the data a bit to see how rigid this structure is...\n",
	"\n",
	"Can we create an ndarray with just one person?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Yep\n"
	]
	}
	],
	"source": [
	"try:\n",
	" np.array(sample_data[:1], dtype=dtype)\n",
	" print(\"Yep\")\n",
	"except Exception as exception:\n",
	" print(\"Nope: %s\" % exception)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Can we create a user who only has ONE favorite food (the dtype explicitly states 3)?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Yep\n",
	"[('Brooke', 'pink', 22, ['brussel sprouts', 'brussel sprouts', 'brussel sprouts'], [('Wicked Weed', 'Napolean Complex IPA', 1, 4.5), ('Sweetwater', 'IPA', 6, 4. ), ('Budweiser', 'Diesel', 11, 3. )])]\n",
	"Hmmm, well that's bizarre, it seems to have padded the array with \"brussel sprouts\" three times ...\n"
	]
	}
	],
	"source": [
	"sample_data_weird_food_counts = [\n",
	" (\n",
	" \"Brooke\",\n",
	" 'pink',\n",
	" 22,\n",
	" [\n",
	" \"brussel sprouts\",\n",
	" ],\n",
	" [\n",
	" (\"Wicked Weed\", \"Napolean Complex IPA\", 1, 4.5),\n",
	" (\"Sweetwater\", \"IPA\", 6, 4.0),\n",
	" (\"Budweiser\", \"Diesel\", 11, 3.0),\n",
	" ],\n",
	" ),\n",
	"]\n",
	"try:\n",
	" people_one_food = np.array(sample_data_weird_food_counts, dtype=dtype)\n",
	" print(\"Yep\")\n",
	" print(people_one_food)\n",
	" print(\"Hmmm, well that's bizarre, it seems to have padded the array with \\\"brussel sprouts\\\" three times ...\")\n",
	"except Exception as exception:\n",
	" print(\"Nope: %s\" % exception)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"What if there are TWO fav foods (one too few)?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"foods: ['brussel sprouts', 'hummus']\n",
	"Nope: cannot copy sequence with size 2 to array axis with dimension 3\n"
	]
	}
	],
	"source": [
	"sample_data_weird_food_counts[0][3].append(\"hummus\")\n",
	"print(\"foods: %s\" % sample_data_weird_food_counts[0][3])\n",
	"try:\n",
	" np.array(sample_data_weird_food_counts, dtype=dtype)\n",
	" print(\"Yep\")\n",
	"except Exception as exception:\n",
	" print(\"Nope: %s\" % exception)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"OK. What if there are FOUR fav foods (one too many)?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"foods: ['brussel sprouts', 'hummus', 'saltines', 'ketchup']\n",
	"Nope: cannot copy sequence with size 4 to array axis with dimension 3\n"
	]
	}
	],
	"source": [
	"sample_data_weird_food_counts[0][3].extend([\"saltines\", \"ketchup\"])\n",
	"print(\"foods: %s\" % sample_data_weird_food_counts[0][3])\n",
	"try:\n",
	" np.array(sample_data_weird_food_counts, dtype=dtype)\n",
	" print(\"Yep\")\n",
	"except Exception as exception:\n",
	" print(\"Nope: %s\" % exception)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Interesting! OK, now let's fox with the dtype a bit ...\n",
	"\n",
	"The dtype descr looks like this:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('name', '<U32'),\n",
	" ('favorite_color', '<U10'),\n",
	" ('favorite_number', '<i4'),\n",
	" ('top_three_foods', '<U32', (3,)),\n",
	" ('beer_in_fridge',\n",
	" [('brewer', '<U32'),\n",
	" ('name', '<U32'),\n",
	" ('count', '<i4'),\n",
	" ('star_rating', '<f4')],\n",
	" (3,))]"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dtype.descr"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"And it looks like this when serialized to JSON:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'[[\"name\", \"<U32\"], [\"favorite_color\", \"<U10\"], [\"favorite_number\", \"<i4\"], [\"top_three_foods\", \"<U32\", [3]], [\"beer_in_fridge\", [[\"brewer\", \"<U32\"], [\"name\", \"<U32\"], [\"count\", \"<i4\"], [\"star_rating\", \"<f4\"]], [3]]]'"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dtype_descr_to_json = json.dumps(dtype.descr)\n",
	"dtype_descr_to_json"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"And here's what that looks like when deserialized from JSON (note all lists, no tuples):"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[['name', '<U32'],\n",
	" ['favorite_color', '<U10'],\n",
	" ['favorite_number', '<i4'],\n",
	" ['top_three_foods', '<U32', [3]],\n",
	" ['beer_in_fridge',\n",
	" [['brewer', '<U32'],\n",
	" ['name', '<U32'],\n",
	" ['count', '<i4'],\n",
	" ['star_rating', '<f4']],\n",
	" [3]]]"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dtype_descr_from_json = json.loads(dtype_descr_to_json)\n",
	"dtype_descr_from_json"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Here's a handy little recursive function I wrote for deserializing from JSON:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"def deserialize_dtype_descr_json(dtype_descr_json):\n",
	" \"\"\"Recursive method for deserializing nested dtype.descr json blobs\n",
	"\n",
	" The reason for this is that JSON doesn't distinguish between lists\n",
	" and tuples, but dtype formatting REQUIRE lists on the outside and\n",
	" tuples on the inside\n",
	" \"\"\"\n",
	" dtype = []\n",
	" for dtype_entry in dtype_descr_json:\n",
	" # Each dtype entry will be of size 2 or 3 depending on if it's a\n",
	" # sequence or not\n",
	" if isinstance(dtype_entry[1], list):\n",
	" # The second element is the dtype of the dtype entry, so if\n",
	" # it's a list (of lists) it has to be converted to a list of\n",
	" # tuples\n",
	" dtype_entry[1] = deserialize_dtype_descr_json(dtype_entry[1])\n",
	" if len(dtype_entry) >= 3:\n",
	" # TODO: Do we have to deserialize the third element to a\n",
	" # tuple if it's a list? Seems to work as a list, but doing\n",
	" # this anyway for safety/conformity.\n",
	" dtype_entry[2] = tuple(dtype_entry[2])\n",
	" dtype.append(tuple(dtype_entry))\n",
	" return dtype"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"And here's what that looks like after being deserialized:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('name', '<U32'),\n",
	" ('favorite_color', '<U10'),\n",
	" ('favorite_number', '<i4'),\n",
	" ('top_three_foods', '<U32', (3,)),\n",
	" ('beer_in_fridge',\n",
	" [('brewer', '<U32'),\n",
	" ('name', '<U32'),\n",
	" ('count', '<i4'),\n",
	" ('star_rating', '<f4')],\n",
	" (3,))]"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dtype_descr_from_json_deserialized = deserialize_dtype_descr_json(dtype_descr_from_json)\n",
	"dtype_descr_from_json_deserialized"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Confirming that we can deserialize from JSON properly:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dtype.descr == dtype_descr_from_json_deserialized"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Confirming that using this deserialized dtype works properly:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"people_from_deserialized_dtype_descr = np.array(sample_data,\n",
	" dtype=dtype_descr_from_json_deserialized)\n",
	"(people == people_from_deserialized_dtype_descr).all()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
No results found