thekensta · December 5, 2015 20:56
diff --git a/PrepareData.ipynb b/PrepareData.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot  as plt\n",
    "import numpy as np\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['../Prepped data/17. CRS Service Users 2014/All service users.csv',\n",
       " '../Prepped data/17. CRS Service Users 2014/Carer age.csv',\n",
       " '../Prepped data/17. CRS Service Users 2014/Carer Ethnicity.csv',\n",
       " '../Prepped data/17. CRS Service Users 2014/Carer Gender.csv',\n",
       " '../Prepped data/17. CRS Service Users 2014/Carer Language.csv',\n",
       " '../Prepped data/17. CRS Service Users 2014/Carer Partnership.csv']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import glob\n",
    "glob.glob(\"../Prepped data/17. CRS Service Users 2014/*\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Read in all the raw survey files\n",
    "datasets = {fin: pd.read_csv(\"../Prepped data/\" +fin) \n",
    "            for fin in glob.glob(\"../Prepped data/17. CRS Service Users 2014/*\")}\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load the data from Data set 8\n",
    "\n",
    "Conver the really wide columns into a narrow set with sex / age/  ethnicity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data8 = pd.read_csv(\"../Prepped data/08.1 Ethnicity Sex Age England & Wales/8. Ethnicity Sex Age EW.csv\")\n",
    "#data8.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get all the columns and look for the pivot and value columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "366 columns in total\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['LAD12CD',\n",
       " 'Local.Authority.District.12',\n",
       " 'Counties.&.UAs.code',\n",
       " 'Counties.&.Unitary.Authorities',\n",
       " 'Locality.cost.centre',\n",
       " 'Locality']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d8c = [c for c in data8.columns]\n",
    "print(len(d8c), \"columns in total\")\n",
    "d8c[:6]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Melt the dataset to narrow format \n",
    "Then add columns for sex / age / ethnicity "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data8melt = pd.melt(data8, id_vars=d8c[:6], value_vars=d8c[6:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(348, 366)\n",
      "(125280, 8)\n"
     ]
    }
   ],
   "source": [
    "print(data8.shape)\n",
    "print(data8melt.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Age.65.and.over', 72),\n",
       " ('All.categories:.Age', 72),\n",
       " ('Age.0.to.24', 72),\n",
       " ('Age.25.to.49', 72),\n",
       " ('Age.50.to.64', 72)]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def parse_col_name(cname):\n",
    "    \"\"\"Split a column into sex, age and ethnicity fields. \n",
    "    \n",
    "    >>> parse_col_name('Sex:.All.persons;.Age:.All.categories:.Age;.Ethnic.Group:.White:.English/Welsh/Scottish/Northern.Irish/British')\n",
    "    ('All.persons',\n",
    "     'All.categories:.Age',\n",
    "     'White:.English/Welsh/Scottish/Northern.Irish/British')\n",
    "    \n",
    "    \"\"\"\n",
    "    # All multi fields have sex / age / erthnicity\n",
    "    fields = cname.split(\";\")\n",
    "    sex_, age_, ethnic_ = fields\n",
    "    sex = sex_.split(\":\", 1)[1]\n",
    "    age = age_.split(\":\", 1)[1]\n",
    "    ethnic = ethnic_.split(\":\", 1)[1]\n",
    "    # Remove leading '.' on all columns\n",
    "    return sex[1:], age[1:], ethnic[1:]\n",
    "\n",
    "\n",
    "sexes, ages, ethnics = [], [], []\n",
    "for col in d8c:\n",
    "    # Ignore all the columns used for pivoting\n",
    "    if col.startswith(\"Sex\"):\n",
    "        s, a, e = parse_col_name(col)\n",
    "        sexes.append(s)\n",
    "        ages.append(a)\n",
    "        ethnics.append(e)\n",
    "\n",
    "# Run to show counts of what is in each\n",
    "c = Counter()\n",
    "c.update(ages) # sexes / ethnics\n",
    "c.most_common()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The long, ugly column names in the wide table are now a single column\n",
    "\n",
    "Build a DataFrame with columns [\"long name\", \"sex\", \"age\", \"ethnicity\"] and then join\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(125280, 8)\n",
      "(125280, 12)\n",
      "['LAD12CD', 'Local.Authority.District.12', 'Counties.&.UAs.code', 'Counties.&.Unitary.Authorities', 'Locality.cost.centre', 'Locality', 'variable', 'value', 'age', 'colname', 'ethnic', 'sex']\n"
     ]
    }
   ],
   "source": [
    "## Columns Lookup\n",
    "column_lookup = pd.DataFrame({\"colname\": [c for c in d8c if c.startswith(\"Sex\")], \n",
    "                              \"sex\": sexes, \n",
    "                              \"age\": ages, \n",
    "                              \"ethnic\": ethnics})\n",
    "\n",
    "\n",
    "\n",
    "data8_sex_age_eth = pd.merge(data8melt, column_lookup, left_on=\"variable\", right_on=\"colname\")\n",
    "\n",
    "print(data8melt.shape)\n",
    "print(data8_sex_age_eth.shape)\n",
    "\n",
    "print([c for c in data8_sex_age_eth.columns])\n",
    "cols_of_interest = ['LAD12CD', 'Local.Authority.District.12', 'Counties.&.UAs.code', \n",
    "                    'Counties.&.Unitary.Authorities', 'Locality.cost.centre', 'Locality', 'variable', 'value', \n",
    "                    'age', 'ethnic', 'sex']\n",
    "\n",
    "# Filter out columns and save to file\n",
    "data8_sex_age_eth_final = data8_sex_age_eth[cols_of_interest]\n",
    "data8_sex_age_eth_final.to_csv(\"../workingdata/data8_with_age_sex_ethnic.csv\", index=False, sep=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# data8melt.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Users of the Service Survey"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#\n",
    "# Load all the data and turn into narrow format\n",
    "# \n",
    "\n",
    "## Working directory is\n",
    "#\n",
    "# /+\n",
    "#  /notebooks/<this file>\n",
    "#  |           \n",
    "#  /Prepped data/\n",
    "#               |\n",
    "#               /<survey 17>/\n",
    "#                           | \n",
    "#                           + <data.csv>\n",
    "#\n",
    "# There is one CSV per survey\n",
    "#\n",
    "datasets = {fin: pd.read_csv(\"../Prepped data/\" +fin) \n",
    "            for fin in glob.glob(\"../Prepped data/17. CRS Service Users 2014/*\")}\n",
    "\n",
    "\n",
    "# Locality needs to be based on the \"Locality CRS 2014\" columns\n",
    "# This needs to be split according to the ratio in locations-remap.csv (in version control)\n",
    "def single_narrow_dataset(datasets, locality_field = \"Locality.CRS.2014\"):\n",
    "    \n",
    "    # Read in a list of files from a directory and create a single DataFrame\n",
    "    df_list = []\n",
    "    \n",
    "    for key, df in datasets.items():\n",
    "        \n",
    "        # Convert non-locality columns in a single column\n",
    "        df_ = pd.melt(df, id_vars=locality_field, \n",
    "                          value_vars=[c for c in df.columns[2:]], \n",
    "                          value_name=\"value\")\n",
    "        # Select the columns we are interested in\n",
    "        df_.columns = [\"Location\", \"Variable\", \"Count\"]\n",
    "        # Create a dataset name from the original CSV\n",
    "        dimname = os.path.basename(key)[:-4]\n",
    "        df_[\"Dimension\"] = dimname\n",
    "        df_[[\"Location\", \"Dimension\", \"Variable\", \"Count\"]]\n",
    "        df_list.append(df_)\n",
    "    \n",
    "    # Stack all the columns \n",
    "    clean_df = pd.concat(df_list)\n",
    "    # Get rid of all the non-total fields\n",
    "    full_data = clean_df[clean_df.Variable != \"Grand.Total\"]\n",
    "    \n",
    "    # Re-map Avon, Gloucestershire & Wiltshire \n",
    "    # Banes, Swindon, Wiltshire and Care Services  0.274\n",
    "    # and\n",
    "    # Bristol, Gloucestershire and South Gloucestershire 72.6 \n",
    "    LocationLookup = pd.read_csv(\"../workingdata/locations-remap.csv\")\n",
    "    full_data = pd.merge(full_data, LocationLookup, on=\"Location\")\n",
    "    \n",
    "    # Set all the Nans to 0\n",
    "    full_data[\"FullCount\"] = full_data[\"Count\"].fillna(0)\n",
    "    \n",
    "    # Aggregate by Location and Variable\n",
    "    full_data = (full_data\n",
    "                 .groupby([\"NewLocation\", \"Variable\", \"Dimension\"])\n",
    "                [\"FullCount\"]\n",
    "                .sum()\n",
    "                .reset_index())\n",
    "    \n",
    "    # Get a within segment proportion\n",
    "    full_data[\"FullProp\"] = (full_data\n",
    "                             .groupby([\"Dimension\", \"NewLocation\"])\n",
    "                             [\"FullCount\"]\n",
    "                             .transform(lambda x: x/x.sum()))\n",
    "    return full_data[[]]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "full_data = single_narrow_dataset(datasets)\n",
    "full_data.to_csv(\"../workingdata/known_data_by_question_response2.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import matplotlib.pyplot as plt\n",
	"import numpy as np\n",
	"\n",
	"%matplotlib inline\n",
	"\n",
	"from collections import Counter"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['../Prepped data/17. CRS Service Users 2014/All service users.csv',\n",
	" '../Prepped data/17. CRS Service Users 2014/Carer age.csv',\n",
	" '../Prepped data/17. CRS Service Users 2014/Carer Ethnicity.csv',\n",
	" '../Prepped data/17. CRS Service Users 2014/Carer Gender.csv',\n",
	" '../Prepped data/17. CRS Service Users 2014/Carer Language.csv',\n",
	" '../Prepped data/17. CRS Service Users 2014/Carer Partnership.csv']"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import glob\n",
	"glob.glob(\"../Prepped data/17. CRS Service Users 2014/*\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# Read in all the raw survey files\n",
	"datasets = {fin: pd.read_csv(\"../Prepped data/\" +fin) \n",
	" for fin in glob.glob(\"../Prepped data/17. CRS Service Users 2014/*\")}\n",
	" "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Load the data from Data set 8\n",
	"\n",
	"Conver the really wide columns into a narrow set with sex / age/ ethnicity"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"data8 = pd.read_csv(\"../Prepped data/08.1 Ethnicity Sex Age England & Wales/8. Ethnicity Sex Age EW.csv\")\n",
	"#data8.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Get all the columns and look for the pivot and value columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"366 columns in total\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"['LAD12CD',\n",
	" 'Local.Authority.District.12',\n",
	" 'Counties.&.UAs.code',\n",
	" 'Counties.&.Unitary.Authorities',\n",
	" 'Locality.cost.centre',\n",
	" 'Locality']"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"d8c = [c for c in data8.columns]\n",
	"print(len(d8c), \"columns in total\")\n",
	"d8c[:6]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Melt the dataset to narrow format \n",
	"Then add columns for sex / age / ethnicity "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"data8melt = pd.melt(data8, id_vars=d8c[:6], value_vars=d8c[6:])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(348, 366)\n",
	"(125280, 8)\n"
	]
	}
	],
	"source": [
	"print(data8.shape)\n",
	"print(data8melt.shape)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[('Age.65.and.over', 72),\n",
	" ('All.categories:.Age', 72),\n",
	" ('Age.0.to.24', 72),\n",
	" ('Age.25.to.49', 72),\n",
	" ('Age.50.to.64', 72)]"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"def parse_col_name(cname):\n",
	" \"\"\"Split a column into sex, age and ethnicity fields. \n",
	" \n",
	" >>> parse_col_name('Sex:.All.persons;.Age:.All.categories:.Age;.Ethnic.Group:.White:.English/Welsh/Scottish/Northern.Irish/British')\n",
	" ('All.persons',\n",
	" 'All.categories:.Age',\n",
	" 'White:.English/Welsh/Scottish/Northern.Irish/British')\n",
	" \n",
	" \"\"\"\n",
	" # All multi fields have sex / age / erthnicity\n",
	" fields = cname.split(\";\")\n",
	" sex_, age_, ethnic_ = fields\n",
	" sex = sex_.split(\":\", 1)[1]\n",
	" age = age_.split(\":\", 1)[1]\n",
	" ethnic = ethnic_.split(\":\", 1)[1]\n",
	" # Remove leading '.' on all columns\n",
	" return sex[1:], age[1:], ethnic[1:]\n",
	"\n",
	"\n",
	"sexes, ages, ethnics = [], [], []\n",
	"for col in d8c:\n",
	" # Ignore all the columns used for pivoting\n",
	" if col.startswith(\"Sex\"):\n",
	" s, a, e = parse_col_name(col)\n",
	" sexes.append(s)\n",
	" ages.append(a)\n",
	" ethnics.append(e)\n",
	"\n",
	"# Run to show counts of what is in each\n",
	"c = Counter()\n",
	"c.update(ages) # sexes / ethnics\n",
	"c.most_common()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"The long, ugly column names in the wide table are now a single column\n",
	"\n",
	"Build a DataFrame with columns [\"long name\", \"sex\", \"age\", \"ethnicity\"] and then join\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(125280, 8)\n",
	"(125280, 12)\n",
	"['LAD12CD', 'Local.Authority.District.12', 'Counties.&.UAs.code', 'Counties.&.Unitary.Authorities', 'Locality.cost.centre', 'Locality', 'variable', 'value', 'age', 'colname', 'ethnic', 'sex']\n"
	]
	}
	],
	"source": [
	"## Columns Lookup\n",
	"column_lookup = pd.DataFrame({\"colname\": [c for c in d8c if c.startswith(\"Sex\")], \n",
	" \"sex\": sexes, \n",
	" \"age\": ages, \n",
	" \"ethnic\": ethnics})\n",
	"\n",
	"\n",
	"\n",
	"data8_sex_age_eth = pd.merge(data8melt, column_lookup, left_on=\"variable\", right_on=\"colname\")\n",
	"\n",
	"print(data8melt.shape)\n",
	"print(data8_sex_age_eth.shape)\n",
	"\n",
	"print([c for c in data8_sex_age_eth.columns])\n",
	"cols_of_interest = ['LAD12CD', 'Local.Authority.District.12', 'Counties.&.UAs.code', \n",
	" 'Counties.&.Unitary.Authorities', 'Locality.cost.centre', 'Locality', 'variable', 'value', \n",
	" 'age', 'ethnic', 'sex']\n",
	"\n",
	"# Filter out columns and save to file\n",
	"data8_sex_age_eth_final = data8_sex_age_eth[cols_of_interest]\n",
	"data8_sex_age_eth_final.to_csv(\"../workingdata/data8_with_age_sex_ethnic.csv\", index=False, sep=\",\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# data8melt.head()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Users of the Service Survey"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"#\n",
	"# Load all the data and turn into narrow format\n",
	"# \n",
	"\n",
	"## Working directory is\n",
	"#\n",
	"# /+\n",
	"# /notebooks/<this file>\n",
	"# \| \n",
	"# /Prepped data/\n",
	"# \|\n",
	"# /<survey 17>/\n",
	"# \| \n",
	"# + <data.csv>\n",
	"#\n",
	"# There is one CSV per survey\n",
	"#\n",
	"datasets = {fin: pd.read_csv(\"../Prepped data/\" +fin) \n",
	" for fin in glob.glob(\"../Prepped data/17. CRS Service Users 2014/*\")}\n",
	"\n",
	"\n",
	"# Locality needs to be based on the \"Locality CRS 2014\" columns\n",
	"# This needs to be split according to the ratio in locations-remap.csv (in version control)\n",
	"def single_narrow_dataset(datasets, locality_field = \"Locality.CRS.2014\"):\n",
	" \n",
	" # Read in a list of files from a directory and create a single DataFrame\n",
	" df_list = []\n",
	" \n",
	" for key, df in datasets.items():\n",
	" \n",
	" # Convert non-locality columns in a single column\n",
	" df_ = pd.melt(df, id_vars=locality_field, \n",
	" value_vars=[c for c in df.columns[2:]], \n",
	" value_name=\"value\")\n",
	" # Select the columns we are interested in\n",
	" df_.columns = [\"Location\", \"Variable\", \"Count\"]\n",
	" # Create a dataset name from the original CSV\n",
	" dimname = os.path.basename(key)[:-4]\n",
	" df_[\"Dimension\"] = dimname\n",
	" df_[[\"Location\", \"Dimension\", \"Variable\", \"Count\"]]\n",
	" df_list.append(df_)\n",
	" \n",
	" # Stack all the columns \n",
	" clean_df = pd.concat(df_list)\n",
	" # Get rid of all the non-total fields\n",
	" full_data = clean_df[clean_df.Variable != \"Grand.Total\"]\n",
	" \n",
	" # Re-map Avon, Gloucestershire & Wiltshire \n",
	" # Banes, Swindon, Wiltshire and Care Services 0.274\n",
	" # and\n",
	" # Bristol, Gloucestershire and South Gloucestershire 72.6 \n",
	" LocationLookup = pd.read_csv(\"../workingdata/locations-remap.csv\")\n",
	" full_data = pd.merge(full_data, LocationLookup, on=\"Location\")\n",
	" \n",
	" # Set all the Nans to 0\n",
	" full_data[\"FullCount\"] = full_data[\"Count\"].fillna(0)\n",
	" \n",
	" # Aggregate by Location and Variable\n",
	" full_data = (full_data\n",
	" .groupby([\"NewLocation\", \"Variable\", \"Dimension\"])\n",
	" [\"FullCount\"]\n",
	" .sum()\n",
	" .reset_index())\n",
	" \n",
	" # Get a within segment proportion\n",
	" full_data[\"FullProp\"] = (full_data\n",
	" .groupby([\"Dimension\", \"NewLocation\"])\n",
	" [\"FullCount\"]\n",
	" .transform(lambda x: x/x.sum()))\n",
	" return full_data[[]]\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"full_data = single_narrow_dataset(datasets)\n",
	"full_data.to_csv(\"../workingdata/known_data_by_question_response2.csv\", index=False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}