Skip to content

Instantly share code, notes, and snippets.

@drcjar
Last active August 29, 2015 14:04
Show Gist options
  • Save drcjar/fff47ba47b4600cca85a to your computer and use it in GitHub Desktop.
Save drcjar/fff47ba47b4600cca85a to your computer and use it in GitHub Desktop.
CCGPopulationDatas
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Goal: Provide best guess CCG population denominator for GP prescribing data. "
]
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"Prescribing data is GP level and there are two types of CCG code (old and new) so this requires 1. obtaining CCG population data 2. obtaining mapping of old to new CCG codes 3. obtaining GP practice to CCG mapping. \n",
"\n",
"NB: There does not appear to be an authorative list of GP practices. There are other data quality issues that should be written up another time."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#ccg mid-year population estimates from ONS\n",
"#from http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526\n",
"#has CCG code in new format\n",
"\n",
"df = pd.read_csv('SAPE7DT1-Mid-2012-ccg-syoa-file.csv') "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.icol(3).dropna().nunique() #column 3 has our 211 ccgs\n",
"df1 = df[~df.icol(3).isnull()] #throw away rows that don't relate to our ccgs\n",
"df2 = df1.icol([0,3,4]) #just use all ages and throw away the rest"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df2.columns = ['CCG13CD', 'CCG_Name', 'Population'] #name the columns sensibly"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#csv of old CCG codes matched onto new ones\n",
"#from http://www.erpho.org.uk/viewResource.aspx?id=22125\n",
"df3 = pd.read_csv('ccgcodemap.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#merge our dataframes to make a frame with CCG names and old and new codes and population sizes\n",
"df4 = pd.merge(df2, df3, on='CCG13CD')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#gp to ccg mapping 1\n",
"#wget http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip\n",
"#has CCG code in old format and contains too many CCGs (>211)\n",
"\n",
"pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
"os.chdir(pathtogpdata)\n",
"df5 = pd.read_csv('interimpcmem_v5.csv') \n",
"df5.rename(columns = {'PRACTICECODE': 'PRACTICE'}, inplace=True)\n",
"df5 = df5[['PRACTICE', 'CCGCODE']] #throw away columns we don't care about"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#gp to ccg mapping 2\n",
"#wget https://indicators.ic.nhs.uk/download/Clinical%20Commissioning%20Group%20Indicators/Data/GP_registered_patients_2012.csv\n",
"#has CCG code in old format and contains the 'right' number of CCGs (211)\n",
"\n",
"pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
"os.chdir(pathtogpdata)\n",
"df6 = pd.read_csv('GP_registered_patients_2012.csv') \n",
"df6.columns = ['Year', 'PRACTICE', 'CCGCODE', 'CCG_Name', 'Gender', 'Age band', 'Population'] "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#lets try another non-authorative list of practices...\n",
"#http://rdsg.nihr.ac.uk/apex/rds\n",
"\n",
"pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'\n",
"os.chdir(pathtogpdata)\n",
"df7 = pd.read_csv('irastrusts.csv')\n",
"print len(df7[df7.icol(1) == 'GPPRACTICE']) #10808 gp practices\n",
"df8 = df7[df7.icol(1) == 'GPPRACTICE']\n",
"df8.columns = ['id', 'org_type', 'PRACTICE', 'add1', 'add2', 'add3', 'add4', 'add5', 'add6', 'postcode', 'code1', 'code2']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10808\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment