drcjar · August 29, 2015 14:04
diff --git a/CCGPopulationDatas.ipynb b/CCGPopulationDatas.ipynb
 {
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Goal: Provide best guess CCG population denominator for GP prescribing data. "
     ]
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Prescribing data is GP level and there are two types of CCG code (old and new) so this requires 1. obtaining CCG population data 2. obtaining mapping of old to new CCG codes 3. obtaining GP practice to CCG mapping. \n",
      "\n",
      "NB: There does not appear to be an authorative list of GP practices. There are other data quality issues that should be written up another time."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import os\n",
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#ccg mid-year population estimates from ONS\n",
      "#from http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526\n",
      "#has CCG code in new format\n",
      "\n",
      "df = pd.read_csv('SAPE7DT1-Mid-2012-ccg-syoa-file.csv') "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df.icol(3).dropna().nunique() #column 3 has our 211 ccgs\n",
      "df1 = df[~df.icol(3).isnull()]  #throw away rows that don't relate to our ccgs\n",
      "df2 = df1.icol([0,3,4]) #just use all ages and throw away the rest"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df2.columns = ['CCG13CD', 'CCG_Name', 'Population'] #name the columns sensibly"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#csv of old CCG codes matched onto new ones\n",
      "#from http://www.erpho.org.uk/viewResource.aspx?id=22125\n",
      "df3 = pd.read_csv('ccgcodemap.csv')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#merge our dataframes to make a frame with CCG names and old and new codes and population sizes\n",
      "df4 = pd.merge(df2, df3, on='CCG13CD')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#gp to ccg mapping 1\n",
      "#wget http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip\n",
      "#has CCG code in old format and contains too many CCGs (>211)\n",
      "\n",
      "pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
      "os.chdir(pathtogpdata)\n",
      "df5 = pd.read_csv('interimpcmem_v5.csv') \n",
      "df5.rename(columns = {'PRACTICECODE': 'PRACTICE'}, inplace=True)\n",
      "df5 = df5[['PRACTICE', 'CCGCODE']] #throw away columns we don't care about"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#gp to ccg mapping 2\n",
      "#wget https://indicators.ic.nhs.uk/download/Clinical%20Commissioning%20Group%20Indicators/Data/GP_registered_patients_2012.csv\n",
      "#has CCG code in old format and contains the 'right' number of CCGs (211)\n",
      "\n",
      "pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
      "os.chdir(pathtogpdata)\n",
      "df6 = pd.read_csv('GP_registered_patients_2012.csv') \n",
      "df6.columns = ['Year', 'PRACTICE', 'CCGCODE', 'CCG_Name', 'Gender', 'Age band', 'Population'] "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#lets try another non-authorative list of practices...\n",
      "#http://rdsg.nihr.ac.uk/apex/rds\n",
      "\n",
      "pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
      "os.chdir(pathtogpdata)\n",
      "df7 = pd.read_csv('irastrusts.csv')\n",
      "print len(df7[df7.icol(1) == 'GPPRACTICE']) #10808 gp practices\n",
      "df8 = df7[df7.icol(1) == 'GPPRACTICE']\n",
      "df8.columns = ['id', 'org_type', 'PRACTICE', 'add1', 'add2', 'add3', 'add4', 'add5', 'add6', 'postcode', 'code1', 'code2']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "10808\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
 }
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "heading",
	"level": 1,
	"metadata": {},
	"source": [
	"Goal: Provide best guess CCG population denominator for GP prescribing data. "
	]
	},
	{
	"cell_type": "heading",
	"level": 6,
	"metadata": {},
	"source": [
	"Prescribing data is GP level and there are two types of CCG code (old and new) so this requires 1. obtaining CCG population data 2. obtaining mapping of old to new CCG codes 3. obtaining GP practice to CCG mapping. \n",
	"\n",
	"NB: There does not appear to be an authorative list of GP practices. There are other data quality issues that should be written up another time."
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import os\n",
	"import pandas as pd"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 1
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"#ccg mid-year population estimates from ONS\n",
	"#from http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526\n",
	"#has CCG code in new format\n",
	"\n",
	"df = pd.read_csv('SAPE7DT1-Mid-2012-ccg-syoa-file.csv') "
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 2
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"df.icol(3).dropna().nunique() #column 3 has our 211 ccgs\n",
	"df1 = df[~df.icol(3).isnull()] #throw away rows that don't relate to our ccgs\n",
	"df2 = df1.icol([0,3,4]) #just use all ages and throw away the rest"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 3
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"df2.columns = ['CCG13CD', 'CCG_Name', 'Population'] #name the columns sensibly"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 4
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"#csv of old CCG codes matched onto new ones\n",
	"#from http://www.erpho.org.uk/viewResource.aspx?id=22125\n",
	"df3 = pd.read_csv('ccgcodemap.csv')"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 5
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"#merge our dataframes to make a frame with CCG names and old and new codes and population sizes\n",
	"df4 = pd.merge(df2, df3, on='CCG13CD')"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 6
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"#gp to ccg mapping 1\n",
	"#wget http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip\n",
	"#has CCG code in old format and contains too many CCGs (>211)\n",
	"\n",
	"pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
	"os.chdir(pathtogpdata)\n",
	"df5 = pd.read_csv('interimpcmem_v5.csv') \n",
	"df5.rename(columns = {'PRACTICECODE': 'PRACTICE'}, inplace=True)\n",
	"df5 = df5[['PRACTICE', 'CCGCODE']] #throw away columns we don't care about"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 7
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"#gp to ccg mapping 2\n",
	"#wget https://indicators.ic.nhs.uk/download/Clinical%20Commissioning%20Group%20Indicators/Data/GP_registered_patients_2012.csv\n",
	"#has CCG code in old format and contains the 'right' number of CCGs (211)\n",
	"\n",
	"pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
	"os.chdir(pathtogpdata)\n",
	"df6 = pd.read_csv('GP_registered_patients_2012.csv') \n",
	"df6.columns = ['Year', 'PRACTICE', 'CCGCODE', 'CCG_Name', 'Gender', 'Age band', 'Population'] "
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 8
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"#lets try another non-authorative list of practices...\n",
	"#http://rdsg.nihr.ac.uk/apex/rds\n",
	"\n",
	"pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
	"os.chdir(pathtogpdata)\n",
	"df7 = pd.read_csv('irastrusts.csv')\n",
	"print len(df7[df7.icol(1) == 'GPPRACTICE']) #10808 gp practices\n",
	"df8 = df7[df7.icol(1) == 'GPPRACTICE']\n",
	"df8.columns = ['id', 'org_type', 'PRACTICE', 'add1', 'add2', 'add3', 'add4', 'add5', 'add6', 'postcode', 'code1', 'code2']"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"10808\n"
	]
	}
	],
	"prompt_number": 9
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}