Created
          June 22, 2017 08:53 
        
      - 
      
- 
        Save psychemedia/5314c1cda9354981eabff09813027f96 to your computer and use it in GitHub Desktop. 
    Example notebook for working with spreadsheet files from TEF 2017
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Quick Look At TEF Data\n", | |
| "*Tony Hirst, @psychemedia*\n", | |
| "\n", | |
| "Notebook to load and have a quick peek at the Teaching Excellence Framework (TEF) data: [Teaching Excellence Framework](http://www.hefce.ac.uk/lt/tef/).\n", | |
| "\n", | |
| "Example spreadsheet format used to develop the original code can be found here: [Teaching Excellence Framework: year 2 specification](https://www.gov.uk/government/publications/teaching-excellence-framework-year-2-specification).\n", | |
| "\n", | |
| "Final spreadsheets available from: [http://www.hefce.ac.uk/lt/tef/data/](http://www.hefce.ac.uk/lt/tef/data/)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## User Settings" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Set to false for full run\n", | |
| "#TEST: True, False\n", | |
| "TEST=False" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Put Excel files into specified (sub)directory and load them from there\n", | |
| "directory='TEFYearTwo_AllMetrics'#'tef'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Utils\n", | |
| "\n", | |
| "Bits and bobs that may or may not be useful..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Packages\n", | |
| "import os\n", | |
| "import pandas as pd\n", | |
| "\n", | |
| "#Support inline plotting\n", | |
| "%matplotlib inline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "(0, 3, 25)" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Set letter vars as numeric - may be useful for referencing cells?\n", | |
| "import string\n", | |
| "k=0\n", | |
| "for l in string.ascii_uppercase:\n", | |
| " exec(l+\"=\"+str(k))\n", | |
| " k=k+1\n", | |
| " \n", | |
| "A, D, Z" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>UKPRN</th>\n", | |
| " <th>PROVIDER_NAME</th>\n", | |
| " <th>VIEW_NAME</th>\n", | |
| " <th>SORT_NAME</th>\n", | |
| " <th>ALIAS</th>\n", | |
| " <th>FLAT_NAME_NUMBER</th>\n", | |
| " <th>BUILDING_NAME_NUMBER</th>\n", | |
| " <th>LOCALITY</th>\n", | |
| " <th>STREET_NAME</th>\n", | |
| " <th>TOWN</th>\n", | |
| " <th>POSTCODE</th>\n", | |
| " <th>WEBSITE_URL</th>\n", | |
| " <th>WIKIPEDIA_URL</th>\n", | |
| " <th>GROUPS</th>\n", | |
| " <th>LONGITUDE</th>\n", | |
| " <th>LATITUDE</th>\n", | |
| " <th>EASTING</th>\n", | |
| " <th>NORTHING</th>\n", | |
| " <th>GTR_ID</th>\n", | |
| " <th>HESA_ID</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>10008640</td>\n", | |
| " <td>FALMOUTH UNIVERSITY</td>\n", | |
| " <td>University College Falmouth</td>\n", | |
| " <td>Falmouth, University College</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Woodlane Campus</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Woodlane</td>\n", | |
| " <td>Falmouth</td>\n", | |
| " <td>TR11 4RH</td>\n", | |
| " <td>http://www.falmouth.ac.uk/</td>\n", | |
| " <td>http://en.wikipedia.org/wiki/University_Colleg...</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>-5.070901</td>\n", | |
| " <td>50.149168</td>\n", | |
| " <td>180711.0</td>\n", | |
| " <td>32196.0</td>\n", | |
| " <td>E84FC550-A4CC-4B98-A6F9-D15A33829D83</td>\n", | |
| " <td>17.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>10007774</td>\n", | |
| " <td>UNIVERSITY OF OXFORD</td>\n", | |
| " <td>University of Oxford</td>\n", | |
| " <td>Oxford, University of</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>UNIVERSITY OFFICES</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>WELLINGTON SQUARE</td>\n", | |
| " <td>OXFORD</td>\n", | |
| " <td>OX1 2JD</td>\n", | |
| " <td>http://www.ox.ac.uk/</td>\n", | |
| " <td>http://en.wikipedia.org/wiki/University_of_Oxford</td>\n", | |
| " <td>Science_and_Engineering_South, Russell_Group, ...</td>\n", | |
| " <td>-1.262868</td>\n", | |
| " <td>51.757644</td>\n", | |
| " <td>450974.0</td>\n", | |
| " <td>206807.0</td>\n", | |
| " <td>B1F0E8FE-FE3C-49ED-9C96-1ED75312A8A0</td>\n", | |
| " <td>156.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>10007768</td>\n", | |
| " <td>UNIVERSITY OF LANCASTER</td>\n", | |
| " <td>University of Lancaster</td>\n", | |
| " <td>Lancaster, University of</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>UNIVERSITY HOUSE</td>\n", | |
| " <td>BAILRIGG</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>LANCASTER</td>\n", | |
| " <td>LA1 4YW</td>\n", | |
| " <td>http://www.lancs.ac.uk/</td>\n", | |
| " <td>http://en.wikipedia.org/wiki/Lancaster_University</td>\n", | |
| " <td>1994_Group, N8_Research_Partnership</td>\n", | |
| " <td>-2.786905</td>\n", | |
| " <td>54.010480</td>\n", | |
| " <td>348528.0</td>\n", | |
| " <td>457448.0</td>\n", | |
| " <td>F8C7F869-77D6-4859-96F1-3550A4951F6C</td>\n", | |
| " <td>123.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>10000571</td>\n", | |
| " <td>BATH SPA UNIVERSITY</td>\n", | |
| " <td>Bath Spa University</td>\n", | |
| " <td>Bath Spa University</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NEWTON PARK</td>\n", | |
| " <td>NEWTON ST. LOE</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>BATH</td>\n", | |
| " <td>BA2 9BN</td>\n", | |
| " <td>http://www.bathspa.ac.uk/</td>\n", | |
| " <td>http://en.wikipedia.org/wiki/Bath_Spa_University</td>\n", | |
| " <td>Million_Plus</td>\n", | |
| " <td>-2.437400</td>\n", | |
| " <td>51.378739</td>\n", | |
| " <td>369654.0</td>\n", | |
| " <td>164501.0</td>\n", | |
| " <td>1A469850-02BA-4814-81EE-E60C851CABC3</td>\n", | |
| " <td>48.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>10007814</td>\n", | |
| " <td>CARDIFF UNIVERSITY</td>\n", | |
| " <td>Cardiff University</td>\n", | |
| " <td>Cardiff University</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>PARK PLACE</td>\n", | |
| " <td>CARDIFF</td>\n", | |
| " <td>CF10 3AT</td>\n", | |
| " <td>http://www.cardiff.ac.uk/</td>\n", | |
| " <td>http://en.wikipedia.org/wiki/Cardiff_University</td>\n", | |
| " <td>GW4, Russell_Group</td>\n", | |
| " <td>-3.179907</td>\n", | |
| " <td>51.489093</td>\n", | |
| " <td>318176.0</td>\n", | |
| " <td>177343.0</td>\n", | |
| " <td>9C10D78F-6430-4CA7-9528-B96B0762A4C6</td>\n", | |
| " <td>179.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " UKPRN PROVIDER_NAME VIEW_NAME \\\n", | |
| "0 10008640 FALMOUTH UNIVERSITY University College Falmouth \n", | |
| "1 10007774 UNIVERSITY OF OXFORD University of Oxford \n", | |
| "2 10007768 UNIVERSITY OF LANCASTER University of Lancaster \n", | |
| "3 10000571 BATH SPA UNIVERSITY Bath Spa University \n", | |
| "4 10007814 CARDIFF UNIVERSITY Cardiff University \n", | |
| "\n", | |
| " SORT_NAME ALIAS FLAT_NAME_NUMBER BUILDING_NAME_NUMBER \\\n", | |
| "0 Falmouth, University College NaN NaN Woodlane Campus \n", | |
| "1 Oxford, University of NaN NaN UNIVERSITY OFFICES \n", | |
| "2 Lancaster, University of NaN NaN UNIVERSITY HOUSE \n", | |
| "3 Bath Spa University NaN NaN NEWTON PARK \n", | |
| "4 Cardiff University NaN NaN NaN \n", | |
| "\n", | |
| " LOCALITY STREET_NAME TOWN POSTCODE \\\n", | |
| "0 NaN Woodlane Falmouth TR11 4RH \n", | |
| "1 NaN WELLINGTON SQUARE OXFORD OX1 2JD \n", | |
| "2 BAILRIGG NaN LANCASTER LA1 4YW \n", | |
| "3 NEWTON ST. LOE NaN BATH BA2 9BN \n", | |
| "4 NaN PARK PLACE CARDIFF CF10 3AT \n", | |
| "\n", | |
| " WEBSITE_URL \\\n", | |
| "0 http://www.falmouth.ac.uk/ \n", | |
| "1 http://www.ox.ac.uk/ \n", | |
| "2 http://www.lancs.ac.uk/ \n", | |
| "3 http://www.bathspa.ac.uk/ \n", | |
| "4 http://www.cardiff.ac.uk/ \n", | |
| "\n", | |
| " WIKIPEDIA_URL \\\n", | |
| "0 http://en.wikipedia.org/wiki/University_Colleg... \n", | |
| "1 http://en.wikipedia.org/wiki/University_of_Oxford \n", | |
| "2 http://en.wikipedia.org/wiki/Lancaster_University \n", | |
| "3 http://en.wikipedia.org/wiki/Bath_Spa_University \n", | |
| "4 http://en.wikipedia.org/wiki/Cardiff_University \n", | |
| "\n", | |
| " GROUPS LONGITUDE LATITUDE \\\n", | |
| "0 NaN -5.070901 50.149168 \n", | |
| "1 Science_and_Engineering_South, Russell_Group, ... -1.262868 51.757644 \n", | |
| "2 1994_Group, N8_Research_Partnership -2.786905 54.010480 \n", | |
| "3 Million_Plus -2.437400 51.378739 \n", | |
| "4 GW4, Russell_Group -3.179907 51.489093 \n", | |
| "\n", | |
| " EASTING NORTHING GTR_ID HESA_ID \n", | |
| "0 180711.0 32196.0 E84FC550-A4CC-4B98-A6F9-D15A33829D83 17.0 \n", | |
| "1 450974.0 206807.0 B1F0E8FE-FE3C-49ED-9C96-1ED75312A8A0 156.0 \n", | |
| "2 348528.0 457448.0 F8C7F869-77D6-4859-96F1-3550A4951F6C 123.0 \n", | |
| "3 369654.0 164501.0 1A469850-02BA-4814-81EE-E60C851CABC3 48.0 \n", | |
| "4 318176.0 177343.0 9C10D78F-6430-4CA7-9528-B96B0762A4C6 179.0 " | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Get some UK HE admin data - may be useful?\n", | |
| "#Via http://learning-provider.data.ac.uk/\n", | |
| "#lp=pd.read_csv('learning-providers-plus.csv')\n", | |
| "lp=pd.read_csv('http://learning-provider.data.ac.uk/data/learning-providers-plus.csv')\n", | |
| "lp.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Get the Data\n", | |
| "\n", | |
| "Data is published as via [http://www.hefce.ac.uk/lt/tef/data/](http://www.hefce.ac.uk/lt/tef/data/).\n", | |
| "\n", | |
| "Originally it was thought that data files would be provided just as individual Excel spreadsheets ([original example](https://www.gov.uk/government/publications/teaching-excellence-framework-year-2-specification)), one per institution, hence the need for this notebook. In the end, multiple versions of the data were made available, including monolithic CSV documents (of file for all institutions), rendering this notebook superfluous.\n", | |
| "\n", | |
| "The final released spreadsheets had some minor differences in layout and sheet labeling from the original example. The original notebook scripts were not intelligent enough to automatically cope with these and nor is this one. For example, cell ranges are hard coded rather than being autodetected.\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 177, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "--2017-06-22 08:41:13-- http://www.hefce.ac.uk/media/HEFCE,2014/Content/Learning,and,teaching/TEF/TEFYearTwo/data/TEFYearTwo_AllMetrics.zip\n", | |
| "Resolving www.hefce.ac.uk (www.hefce.ac.uk)... 195.194.167.210\n", | |
| "Connecting to www.hefce.ac.uk (www.hefce.ac.uk)|195.194.167.210|:80... connected.\n", | |
| "HTTP request sent, awaiting response... 200 OK\n", | |
| "Length: 44520361 (42M) [application/x-zip-compressed]\n", | |
| "Saving to: ‘TEFYearTwo_AllMetrics.zip’\n", | |
| "\n", | |
| "TEFYearTwo_AllMetri 100%[===================>] 42.46M 4.60MB/s in 12s \n", | |
| "\n", | |
| "2017-06-22 08:41:25 (3.61 MB/s) - ‘TEFYearTwo_AllMetrics.zip’ saved [44520361/44520361]\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Download the zipped Excel workbooks\n", | |
| "!wget http://www.hefce.ac.uk/media/HEFCE,2014/Content/Learning,and,teaching/TEF/TEFYearTwo/data/TEFYearTwo_AllMetrics.zip\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Put all the spreadsheet files into a single folder; the script will then load them in and generate some monolithic CSV files of various flavours.\n", | |
| "\n", | |
| "Note the processing of the files by the rest of the notebook is not very efficient..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 184, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!mkdir -p {directory}\n", | |
| "!unzip -q TEFYearTwo_AllMetrics.zip -d {directory}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Column names are crappy with white space - clean it off\n", | |
| "def cleanCols(df):\n", | |
| " df.rename(columns=lambda x: str(x).strip(),inplace=True)\n", | |
| " return df\n", | |
| "\n", | |
| "\n", | |
| "#Hack the PRN out of each sheet and use it to key data values with a new prn column in each dataset\n", | |
| "def getPRN2(fn,sn):\n", | |
| " i=pd.read_excel(fn, sheetname=sn, parse_cols=[0,1],header=None).head()\n", | |
| " institution=None\n", | |
| " prn=None\n", | |
| " institution=str(i[i[0].notnull() & i[0].str.contains('Institution')][1].iloc[0]).strip()\n", | |
| " prn=str(i[i[0].notnull() & i[0].str.contains('UKPRN')][1].iloc[0]).strip()\n", | |
| " return institution,prn\n", | |
| "\n", | |
| "def getPRN(fn,sn):\n", | |
| " #TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| " prn=fn.split('/')[1].split('_')[0]\n", | |
| " institution=fn.split('/')[1].split('_')[1]\n", | |
| " return institution,prn\n", | |
| "\n", | |
| "#Quick helper that would let us leave test spreadsheets in the data dir.\n", | |
| "def fntest(f):\n", | |
| " if TEST: return not f.startswith('~') and f.startswith('TEST')\n", | |
| " return not f.startswith('~') and not f.startswith('TEST')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 167, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "odict_keys(['Coversheet', 'Contextual data', 'Core metrics', 'Core metrics and splits', 'BME breakdown', 'Indicator (a)', 'Benchmark (b)', 'Difference (a-b)', 'Z-score', 'Numerators and denominators'])" | |
| ] | |
| }, | |
| "execution_count": 167, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "fn=os.listdir(directory)[0]\n", | |
| "xl=pd.read_excel('{}/{}'.format(directory,fn), sheetname=None)\n", | |
| "xl.keys()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 169, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "('University of Exeter', '10007792')" | |
| ] | |
| }, | |
| "execution_count": 169, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#get PRN from filename\n", | |
| "getPRN('{}/{}'.format(directory,fn),list(xl.keys())[1])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Notes on Parsing the spreadsheets\n", | |
| "\n", | |
| "The spreadsheets include various compound tables which need processing to split out the separate subtables, such as data for full-time versus part-time students.\n", | |
| "\n", | |
| "The final spreadsheets also had populated, administrative(?) cells at the extremity of many, if not all, sheets. Using the `pandas.read_excel()` function's `parse_cols` parameter allow the ingest of only the desired columns. (Loading in the whole sheet width often caused the multiple header definition part of the ingest to break.)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Core metrics" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Core metrics and splits\n", | |
| "\n", | |
| "Guess that the spreadsheets are regular and use absolute range finding... Which is really dangerous..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 104, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007796_The University of Leicester_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007798_The University of Manchester_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-104-932eeaaf104f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;31m#Loop through all the files in the declared directory, and parse the contents of a particular sheet out\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfntest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m \u001b[0mcms_ft_tmp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcms_pt_tmp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mget_coremetricsAndSplits\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0mcms_ft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcms_ft\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcms_ft_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mcms_pt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcms_pt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcms_pt_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-104-932eeaaf104f>\u001b[0m in \u001b[0;36mget_coremetricsAndSplits\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m xls=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=7,\n\u001b[0;32m----> 5\u001b[0;31m header=[0,1]).dropna(how='all',axis=0))\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0minstitution\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgetPRN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mi_fulltime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mxls\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mxls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m27\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0mrow_tag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"row\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 545\u001b[0m \u001b[0mself_do_row\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_row\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 546\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 547\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mrow_tag\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 548\u001b[0m \u001b[0mself_do_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/lib/python3.5/xml/etree/ElementTree.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m16\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m1024\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1303\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1304\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1305\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1306\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_root\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_close_and_return_root\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/lib/python3.5/xml/etree/ElementTree.py\u001b[0m in \u001b[0;36mfeed\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 1235\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1236\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1237\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1238\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mSyntaxError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1239\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_events_queue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def get_coremetricsAndSplits(fn):\n", | |
| " sn='Core metrics and splits'\n", | |
| " print(fn)\n", | |
| " xls=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=7,\n", | |
| " header=[0,1]).dropna(how='all',axis=0))\n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| " i_fulltime=xls[xls.columns[0:27]][:6]\n", | |
| " i_fulltime.index.names=['Topic']\n", | |
| " i_fulltime['PRN']=prn\n", | |
| " i_fulltime['type']='FT'\n", | |
| "\n", | |
| " i_fulltime=i_fulltime.reset_index().set_index(['Topic','PRN'])\n", | |
| "\n", | |
| " i_fulltime.drop('Unnamed: 0_level_0', level=0, axis=1, inplace=True)\n", | |
| "\n", | |
| " i_parttime=xls[xls.columns[0:27]][7:13]\n", | |
| "\n", | |
| " i_parttime.index.names=['Topic']\n", | |
| " i_parttime['PRN']=prn\n", | |
| " i_parttime['type']='PT'\n", | |
| "\n", | |
| " i_parttime=i_parttime.reset_index().set_index(['Topic','PRN'])\n", | |
| "\n", | |
| " i_parttime.drop('Unnamed: 0_level_0', level=0, axis=1, inplace=True)\n", | |
| " return i_fulltime, i_parttime\n", | |
| " \n", | |
| "cms_ft=pd.DataFrame()\n", | |
| "cms_pt=pd.DataFrame()\n", | |
| "cms_complete=pd.DataFrame()\n", | |
| "\n", | |
| "#Loop through all the files in the declared directory, and parse the contents of a particular sheet out\n", | |
| "for filename in [f for f in os.listdir(directory) if fntest(f)]:\n", | |
| " cms_ft_tmp, cms_pt_tmp=get_coremetricsAndSplits('{}/{}'.format(directory,filename))\n", | |
| " cms_ft=pd.concat([cms_ft,cms_ft_tmp])\n", | |
| " cms_pt=pd.concat([cms_pt,cms_pt_tmp])\n", | |
| " cms_complete=pd.concat([cms_complete,cms_pt_tmp,cms_ft_tmp])\n", | |
| "\n", | |
| "cms_complete.to_csv('tef_coremetricandsplits.csv')\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 105, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th colspan=\"5\" halign=\"left\">Core metrics</th>\n", | |
| " <th>Splits</th>\n", | |
| " <th>type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>Indicator\\n(a) %</th>\n", | |
| " <th>Benchmark\\n(b) %</th>\n", | |
| " <th>Difference \\n(a)-(b) *</th>\n", | |
| " <th>Z-score</th>\n", | |
| " <th>Flag</th>\n", | |
| " <th>Years</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>++</td>\n", | |
| " <td>++</td>\n", | |
| " <td>++</td>\n", | |
| " <td>++</td>\n", | |
| " <td>++</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Core metrics \\\n", | |
| " Indicator\\n(a) % \n", | |
| "Topic PRN \n", | |
| "Assessment and feedback 10007792 SUP \n", | |
| "Academic support 10007792 SUP \n", | |
| "Non-continuation 10007792 NaN \n", | |
| "Employment or further study 10007792 NaN \n", | |
| "Highly skilled employment or further study 10007792 ++ \n", | |
| "\n", | |
| " \\\n", | |
| " Benchmark\\n(b) % \n", | |
| "Topic PRN \n", | |
| "Assessment and feedback 10007792 SUP \n", | |
| "Academic support 10007792 SUP \n", | |
| "Non-continuation 10007792 NaN \n", | |
| "Employment or further study 10007792 NaN \n", | |
| "Highly skilled employment or further study 10007792 ++ \n", | |
| "\n", | |
| " \\\n", | |
| " Difference \\n(a)-(b) * \n", | |
| "Topic PRN \n", | |
| "Assessment and feedback 10007792 SUP \n", | |
| "Academic support 10007792 SUP \n", | |
| "Non-continuation 10007792 NaN \n", | |
| "Employment or further study 10007792 NaN \n", | |
| "Highly skilled employment or further study 10007792 ++ \n", | |
| "\n", | |
| " Splits type \n", | |
| " Z-score Flag Years \n", | |
| "Topic PRN \n", | |
| "Assessment and feedback 10007792 SUP SUP SUP PT \n", | |
| "Academic support 10007792 SUP SUP SUP PT \n", | |
| "Non-continuation 10007792 NaN NaN N PT \n", | |
| "Employment or further study 10007792 NaN NaN N PT \n", | |
| "Highly skilled employment or further study 10007792 ++ ++ N PT " | |
| ] | |
| }, | |
| "execution_count": 105, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "cms_complete.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 106, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>SUP</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>SUP</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN type Heading \\\n", | |
| "0 Assessment and feedback 10007792 PT Core metrics \n", | |
| "1 Academic support 10007792 PT Core metrics \n", | |
| "2 Non-continuation 10007792 PT Core metrics \n", | |
| "3 Employment or further study 10007792 PT Core metrics \n", | |
| "4 Highly skilled employment or further study 10007792 PT Core metrics \n", | |
| "\n", | |
| " Subheading value \n", | |
| "0 Indicator\\n(a) % SUP \n", | |
| "1 Indicator\\n(a) % SUP \n", | |
| "2 Indicator\\n(a) % NaN \n", | |
| "3 Indicator\\n(a) % NaN \n", | |
| "4 Indicator\\n(a) % ++ " | |
| ] | |
| }, | |
| "execution_count": 106, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#It may be easier to work with if we put everything into long format?\n", | |
| "cms_complete_long=cms_complete.reset_index().melt(id_vars=['Topic','PRN', 'type'],\n", | |
| " var_name=['Heading','Subheading'])\n", | |
| "cms_complete_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 107, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array(['PT', 'FT'], dtype=object)" | |
| ] | |
| }, | |
| "execution_count": 107, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "cms_complete_long['type'].unique()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 108, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>10007795</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>21</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>10007795</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Core metrics</td>\n", | |
| " <td>Indicator\\n(a) %</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN type Heading \\\n", | |
| "4 Highly skilled employment or further study 10007792 PT Core metrics \n", | |
| "8 Assessment and feedback 10007792 FT Core metrics \n", | |
| "10 Highly skilled employment or further study 10007792 FT Core metrics \n", | |
| "12 Highly skilled employment or further study 10007795 PT Core metrics \n", | |
| "21 The teaching on my course 10007795 FT Core metrics \n", | |
| "\n", | |
| " Subheading value \n", | |
| "4 Indicator\\n(a) % ++ \n", | |
| "8 Indicator\\n(a) % ++ \n", | |
| "10 Indicator\\n(a) % ++ \n", | |
| "12 Indicator\\n(a) % ++ \n", | |
| "21 Indicator\\n(a) % ++ " | |
| ] | |
| }, | |
| "execution_count": 108, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#In long format it's easy to write queires... For example, report on ++ rows:\n", | |
| "cms_complete_long[cms_complete_long['value']=='++'].head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 511, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>298</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>299</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>310</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>311</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN type Heading \\\n", | |
| "298 Employment or further study nan FT Disadvantaged \n", | |
| "299 Highly skilled employment or further study nan FT Disadvantaged \n", | |
| "310 Employment or further study nan FT Disadvantaged \n", | |
| "311 Highly skilled employment or further study nan FT Disadvantaged \n", | |
| "\n", | |
| " Subheading value \n", | |
| "298 Yes ++ \n", | |
| "299 Yes ++ \n", | |
| "310 Yes ++ \n", | |
| "311 Yes ++ " | |
| ] | |
| }, | |
| "execution_count": 511, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#A query over more columns...\n", | |
| "cms_complete_long[(cms_complete_long['value']=='++') & \n", | |
| " (cms_complete_long['Heading']=='Disadvantaged') &\n", | |
| " (cms_complete_long['Subheading']=='Yes')]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 783, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "cms_complete.to_csv('tef_coremetricandsplits_long.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 943, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Highly skilled employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>Disadvantaged</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>++</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN type Heading \\\n", | |
| "0 Employment or further study nan FT Disadvantaged \n", | |
| "1 Highly skilled employment or further study nan FT Disadvantaged \n", | |
| "2 Employment or further study nan FT Disadvantaged \n", | |
| "3 Highly skilled employment or further study nan FT Disadvantaged \n", | |
| "\n", | |
| " Subheading value \n", | |
| "0 Yes ++ \n", | |
| "1 Yes ++ \n", | |
| "2 Yes ++ \n", | |
| "3 Yes ++ " | |
| ] | |
| }, | |
| "execution_count": 943, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Would it be easier/more useful to demo SQL queries?\n", | |
| "from pandasql import sqldf\n", | |
| "pysqldf = lambda q: sqldf(q, globals())\n", | |
| "\n", | |
| "q='''\n", | |
| "SELECT * FROM cms_complete_long\n", | |
| "WHERE value='++' AND Heading='Disadvantaged'AND Subheading='Yes' LIMIT 10;\n", | |
| "'''\n", | |
| "pysqldf(q)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Contextual data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007796_The University of Leicester_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007798_The University of Manchester_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007799_University of Newcastle upon Tyne_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007801_University of Plymouth_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007802_The University of Reading_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007803_University of St Andrews_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007806_University of Sussex_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007811_Bishop Grosseteste University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007814_Cardiff University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007816_The Royal Central School of Speech and Drama_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007817_Chichester College_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007823_Edge Hill University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007825_Guildhall School of Music & Drama_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007832_Newman University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007833_Wrexham GlyndèÊr University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007835_The Royal Academy of Music_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007837_Royal Northern College of Music_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007842_The University of Cumbria_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007843_St Marys University Twickenham_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007848_University of Chester_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007849_University of Abertay Dundee_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007850_The University of Bath_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007851_University of Derby_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007852_University of Dundee_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007854_Cardiff Metropolitan University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007855_Swansea University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007857_Bangor University_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007858_University of Wales Trinity Saint David_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007859_Warwickshire College_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007938_Grimsby Institute of Further and Higher Education_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-49-5b07e77609b8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'~'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mdemog_tmp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msubj_tmp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mget_contextualdata\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0mdemog\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdemog\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdemog_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0msubj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msubj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msubj_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-49-5b07e77609b8>\u001b[0m in \u001b[0;36mget_contextualdata\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0minstitution\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgetPRN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0m_demog\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcleanCols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mparse_cols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0m_demog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Category'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Dummy'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Group'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'FT Headcount'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'FT %'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'PT Headcount'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'PT %'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Total Headcount'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Total %'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0m_demog\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_demog\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Dummy'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 547\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mrow_tag\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 548\u001b[0;31m \u001b[0mself_do_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 549\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# destroy all child elements (cells)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"dimension\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mdo_row\u001b[0;34m(self, row_elem)\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mc\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'$'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 653\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 654\u001b[0;31m \u001b[0mlv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mletter_value\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlv\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0mcolx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolx\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m26\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mlv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def get_contextualdata(fn):\n", | |
| " sn='Contextual data'\n", | |
| " print(fn)\n", | |
| " \n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| " \n", | |
| " _demog=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=11,parse_cols=8))\n", | |
| " _demog.columns=['Category','Dummy','Group','FT Headcount','FT %','PT Headcount','PT %','Total Headcount','Total %']\n", | |
| " _demog = _demog.drop('Dummy', axis=1)\n", | |
| " _demog.fillna(method='ffill',inplace=True)\n", | |
| " _demog['PRN']=prn\n", | |
| " \n", | |
| " _subj=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=9,parse_cols=range(10,17))).dropna(how='all',axis=1).dropna(how='all',axis=0)\n", | |
| " _subj.columns=['Subject of study','FT Headcount','FT %','PT Headcount','PT %','Total Headcount','Total %']\n", | |
| " _subj['PRN']=prn\n", | |
| " return _demog, _subj\n", | |
| "\n", | |
| "demog=pd.DataFrame()\n", | |
| "subj=pd.DataFrame()\n", | |
| "\n", | |
| "\n", | |
| "for filename in [f for f in os.listdir(directory) if not f.startswith('~')]:\n", | |
| " demog_tmp,subj_tmp=get_contextualdata('{}/{}'.format(directory,filename))\n", | |
| " demog=pd.concat([demog,demog_tmp])\n", | |
| " subj=pd.concat([subj,subj_tmp])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 170, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Category</th>\n", | |
| " <th>Group</th>\n", | |
| " <th>FT Headcount</th>\n", | |
| " <th>FT %</th>\n", | |
| " <th>PT Headcount</th>\n", | |
| " <th>PT %</th>\n", | |
| " <th>Total Headcount</th>\n", | |
| " <th>Total %</th>\n", | |
| " <th>PRN</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Level of study</td>\n", | |
| " <td>First degree</td>\n", | |
| " <td>14425</td>\n", | |
| " <td>0</td>\n", | |
| " <td>65</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Level of study</td>\n", | |
| " <td>Other UG</td>\n", | |
| " <td>610</td>\n", | |
| " <td>0</td>\n", | |
| " <td>55</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Age</td>\n", | |
| " <td>Under 21</td>\n", | |
| " <td>13795</td>\n", | |
| " <td>0</td>\n", | |
| " <td>25</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Age</td>\n", | |
| " <td>21 to 30</td>\n", | |
| " <td>1080</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Age</td>\n", | |
| " <td>Over 30</td>\n", | |
| " <td>150</td>\n", | |
| " <td>0</td>\n", | |
| " <td>85</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Category Group FT Headcount FT % PT Headcount PT % \\\n", | |
| "0 Level of study First degree 14425 0 65 0 \n", | |
| "1 Level of study Other UG 610 0 55 0 \n", | |
| "2 Age Under 21 13795 0 25 0 \n", | |
| "3 Age 21 to 30 1080 0 10 0 \n", | |
| "4 Age Over 30 150 0 85 0 \n", | |
| "\n", | |
| " Total Headcount Total % PRN \n", | |
| "0 0 0 10007792 \n", | |
| "1 0 0 10007792 \n", | |
| "2 0 0 10007792 \n", | |
| "3 0 0 10007792 \n", | |
| "4 0 0 10007792 " | |
| ] | |
| }, | |
| "execution_count": 170, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "demog.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 585, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Subject of study</th>\n", | |
| " <th>FT Headcount</th>\n", | |
| " <th>FT %</th>\n", | |
| " <th>PT Headcount</th>\n", | |
| " <th>PT %</th>\n", | |
| " <th>Total Headcount</th>\n", | |
| " <th>Total %</th>\n", | |
| " <th>PRN</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Medicine & dentistry and veterinary science</td>\n", | |
| " <td>300.0</td>\n", | |
| " <td>0.03</td>\n", | |
| " <td>30.0</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>330.0</td>\n", | |
| " <td>0.03</td>\n", | |
| " <td>nan</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Subjects allied to medicine</td>\n", | |
| " <td>1000.0</td>\n", | |
| " <td>0.10</td>\n", | |
| " <td>100.0</td>\n", | |
| " <td>0.11</td>\n", | |
| " <td>1100.0</td>\n", | |
| " <td>0.10</td>\n", | |
| " <td>nan</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Biological sciences</td>\n", | |
| " <td>1100.0</td>\n", | |
| " <td>0.11</td>\n", | |
| " <td>110.0</td>\n", | |
| " <td>0.14</td>\n", | |
| " <td>1210.0</td>\n", | |
| " <td>0.11</td>\n", | |
| " <td>nan</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Agriculture & related subjects</td>\n", | |
| " <td>100.0</td>\n", | |
| " <td>0.01</td>\n", | |
| " <td>10.0</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>110.0</td>\n", | |
| " <td>0.01</td>\n", | |
| " <td>nan</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Physical sciences</td>\n", | |
| " <td>500.0</td>\n", | |
| " <td>0.05</td>\n", | |
| " <td>50.0</td>\n", | |
| " <td>0.03</td>\n", | |
| " <td>550.0</td>\n", | |
| " <td>0.05</td>\n", | |
| " <td>nan</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Subject of study FT Headcount FT % \\\n", | |
| "0 Medicine & dentistry and veterinary science 300.0 0.03 \n", | |
| "1 Subjects allied to medicine 1000.0 0.10 \n", | |
| "2 Biological sciences 1100.0 0.11 \n", | |
| "3 Agriculture & related subjects 100.0 0.01 \n", | |
| "4 Physical sciences 500.0 0.05 \n", | |
| "\n", | |
| " PT Headcount PT % Total Headcount Total % PRN \n", | |
| "0 30.0 0.00 330.0 0.03 nan \n", | |
| "1 100.0 0.11 1100.0 0.10 nan \n", | |
| "2 110.0 0.14 1210.0 0.11 nan \n", | |
| "3 10.0 0.00 110.0 0.01 nan \n", | |
| "4 50.0 0.03 550.0 0.05 nan " | |
| ] | |
| }, | |
| "execution_count": 585, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "subj.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 594, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Subject of study</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Total Headcount</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Medicine & dentistry and veterinary science</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>330.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Subjects allied to medicine</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>1100.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Biological sciences</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>1210.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Agriculture & related subjects</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>110.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Physical sciences</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>550.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Subject of study PRN Total Headcount\n", | |
| "0 Medicine & dentistry and veterinary science nan 330.0\n", | |
| "1 Subjects allied to medicine nan 1100.0\n", | |
| "2 Biological sciences nan 1210.0\n", | |
| "3 Agriculture & related subjects nan 110.0\n", | |
| "4 Physical sciences nan 550.0" | |
| ] | |
| }, | |
| "execution_count": 594, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "subj[['Subject of study','PRN','Total Headcount']].head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 825, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Subject of study</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Category</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Medicine & dentistry and veterinary science</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>300.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Subjects allied to medicine</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>1000.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Biological sciences</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>1100.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Agriculture & related subjects</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>100.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Physical sciences</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>500.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Subject of study PRN Category value\n", | |
| "0 Medicine & dentistry and veterinary science nan FT Headcount 300.0\n", | |
| "1 Subjects allied to medicine nan FT Headcount 1000.0\n", | |
| "2 Biological sciences nan FT Headcount 1100.0\n", | |
| "3 Agriculture & related subjects nan FT Headcount 100.0\n", | |
| "4 Physical sciences nan FT Headcount 500.0" | |
| ] | |
| }, | |
| "execution_count": 825, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "subj_long=subj.melt(id_vars=['Subject of study','PRN'],var_name='Category')\n", | |
| "subj_long['value']= pd.to_numeric(subj_long['value'],errors='coerce')\n", | |
| "subj_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 872, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def longify(df,id_vars=['Topic','PRN'],var_name=['Heading','Subheading'],valname='value',resetIndex=True):\n", | |
| " if resetIndex: df=df.reset_index()\n", | |
| " df=df.melt(id_vars=id_vars,\n", | |
| " var_name=var_name,\n", | |
| " value_name=valname)\n", | |
| " df[valname]= pd.to_numeric(df[valname],errors='coerce')\n", | |
| " return df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 873, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Subject of study</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Category</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Medicine & dentistry and veterinary science</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>300.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Subjects allied to medicine</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>1000.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Biological sciences</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>1100.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Agriculture & related subjects</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>100.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Physical sciences</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT Headcount</td>\n", | |
| " <td>500.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Subject of study PRN Category value\n", | |
| "0 Medicine & dentistry and veterinary science nan FT Headcount 300.0\n", | |
| "1 Subjects allied to medicine nan FT Headcount 1000.0\n", | |
| "2 Biological sciences nan FT Headcount 1100.0\n", | |
| "3 Agriculture & related subjects nan FT Headcount 100.0\n", | |
| "4 Physical sciences nan FT Headcount 500.0" | |
| ] | |
| }, | |
| "execution_count": 873, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "subj_long=longify(subj,id_vars=['Subject of study','PRN'],var_name='Category', resetIndex=False)\n", | |
| "subj_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 785, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "subj_long.to_csv('tef_subj_long.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Indicators (a)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 143, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def get_indicators(fn):\n", | |
| " sn='Indicator (a)'\n", | |
| " print(fn)\n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| "\n", | |
| " i=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=22,\n", | |
| " header=[0,1]).dropna(how='all',axis=0).dropna(how='all',axis=1))\n", | |
| " \n", | |
| " i['PRN']=prn\n", | |
| " \n", | |
| " ift=i[1:7][:]\n", | |
| " ift['Type']='FT'\n", | |
| " ipt=i[8:14][:]\n", | |
| " ipt['Type']='PT'\n", | |
| " \n", | |
| " i=pd.concat([ift,ipt]).dropna(how='all',axis=1)\n", | |
| " i.index.names=['Topic']\n", | |
| "\n", | |
| "\n", | |
| " i=i.reset_index().set_index(['Topic','PRN'])\n", | |
| "\n", | |
| " return i" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 144, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: \n", | |
| "A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
| "Try using .loc[row_indexer,col_indexer] = value instead\n", | |
| "\n", | |
| "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
| " if sys.path[0] == '':\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007796_The University of Leicester_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007798_The University of Manchester_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007799_University of Newcastle upon Tyne_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-144-0a4c8ab949a1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'~'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mindicators\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mindicators\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mget_indicators\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mindicators\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'tef_indicators.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-143-c1c21329cc55>\u001b[0m in \u001b[0;36mget_indicators\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m i=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=22,\n\u001b[0;32m----> 7\u001b[0;31m header=[0,1]).dropna(how='all',axis=0).dropna(how='all',axis=1))\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PRN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 547\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mrow_tag\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 548\u001b[0;31m \u001b[0mself_do_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 549\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# destroy all child elements (cells)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"dimension\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mdo_row\u001b[0;34m(self, row_elem)\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[0mletter_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_UPPERCASE_1_REL_INDEX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 637\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcell_elem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrow_elem\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 638\u001b[0;31m \u001b[0mcell_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcell_elem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'r'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 639\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcell_name\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Yes, it's optional.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 640\u001b[0m \u001b[0mcolx\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "indicators=pd.DataFrame()\n", | |
| "\n", | |
| "for filename in [f for f in os.listdir(directory) if not f.startswith('~')]:\n", | |
| " indicators=pd.concat([indicators,get_indicators('{}/{}'.format(directory,filename))])\n", | |
| " \n", | |
| "indicators.to_csv('tef_indicators.csv')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 171, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Ethnicity</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disabled</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Domicile</th>\n", | |
| " <th>Type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First degree</th>\n", | |
| " <th>Other\\nUG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>White</th>\n", | |
| " <th>BME</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>90.6797</td>\n", | |
| " <td>90.8339</td>\n", | |
| " <td>89.8528</td>\n", | |
| " <td>91.2401</td>\n", | |
| " <td>90.6797</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>90.7622</td>\n", | |
| " <td>89.6226</td>\n", | |
| " <td>91.6187</td>\n", | |
| " <td>91.5394</td>\n", | |
| " <td>91.8816</td>\n", | |
| " <td>86.7551</td>\n", | |
| " <td>91.2695</td>\n", | |
| " <td>90.6016</td>\n", | |
| " <td>89.0266</td>\n", | |
| " <td>91.9115</td>\n", | |
| " <td>91.5804</td>\n", | |
| " <td>92.212</td>\n", | |
| " <td>85.0954</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>75.517</td>\n", | |
| " <td>76.4804</td>\n", | |
| " <td>75.0354</td>\n", | |
| " <td>75.0201</td>\n", | |
| " <td>75.517</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>75.4647</td>\n", | |
| " <td>76.1871</td>\n", | |
| " <td>74.9424</td>\n", | |
| " <td>75.5491</td>\n", | |
| " <td>75.8037</td>\n", | |
| " <td>73.7306</td>\n", | |
| " <td>74.0418</td>\n", | |
| " <td>75.7124</td>\n", | |
| " <td>74.0954</td>\n", | |
| " <td>76.5766</td>\n", | |
| " <td>75.4667</td>\n", | |
| " <td>75.1963</td>\n", | |
| " <td>75.9003</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>83.8984</td>\n", | |
| " <td>84.4193</td>\n", | |
| " <td>83.9359</td>\n", | |
| " <td>83.3757</td>\n", | |
| " <td>83.8984</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>84.0099</td>\n", | |
| " <td>82.4686</td>\n", | |
| " <td>84.2166</td>\n", | |
| " <td>83.9893</td>\n", | |
| " <td>84.4267</td>\n", | |
| " <td>81.8901</td>\n", | |
| " <td>84.3709</td>\n", | |
| " <td>83.8358</td>\n", | |
| " <td>84.2393</td>\n", | |
| " <td>83.6444</td>\n", | |
| " <td>83.958</td>\n", | |
| " <td>85.6894</td>\n", | |
| " <td>83.0154</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>2.2783</td>\n", | |
| " <td>2.12284</td>\n", | |
| " <td>2.39577</td>\n", | |
| " <td>2.3185</td>\n", | |
| " <td>2.27954</td>\n", | |
| " <td>N</td>\n", | |
| " <td>1.75764</td>\n", | |
| " <td>9.23277</td>\n", | |
| " <td>3.33333</td>\n", | |
| " <td>1.52757</td>\n", | |
| " <td>2.2543</td>\n", | |
| " <td>2.61283</td>\n", | |
| " <td>3.98773</td>\n", | |
| " <td>2.0488</td>\n", | |
| " <td>2.44962</td>\n", | |
| " <td>2.11622</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>94.4993</td>\n", | |
| " <td>93.9013</td>\n", | |
| " <td>95.2135</td>\n", | |
| " <td>94.332</td>\n", | |
| " <td>94.5236</td>\n", | |
| " <td>R</td>\n", | |
| " <td>94.592</td>\n", | |
| " <td>93.0521</td>\n", | |
| " <td>94.2377</td>\n", | |
| " <td>94.6537</td>\n", | |
| " <td>94.5472</td>\n", | |
| " <td>93.9394</td>\n", | |
| " <td>93.7793</td>\n", | |
| " <td>94.6043</td>\n", | |
| " <td>93.4256</td>\n", | |
| " <td>95.4714</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>79.3124</td>\n", | |
| " <td>79.3296</td>\n", | |
| " <td>79.1721</td>\n", | |
| " <td>79.4422</td>\n", | |
| " <td>79.3698</td>\n", | |
| " <td>R</td>\n", | |
| " <td>79.3065</td>\n", | |
| " <td>79.4045</td>\n", | |
| " <td>74.7899</td>\n", | |
| " <td>80.011</td>\n", | |
| " <td>79.0003</td>\n", | |
| " <td>83.6364</td>\n", | |
| " <td>77.8169</td>\n", | |
| " <td>79.5307</td>\n", | |
| " <td>80.8116</td>\n", | |
| " <td>77.955</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>21.0526</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>21.0526</td>\n", | |
| " <td>0</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>91.1111</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>88.8889</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>86.9565</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <th>10007795</th>\n", | |
| " <td>90.9203</td>\n", | |
| " <td>89.8789</td>\n", | |
| " <td>91.7786</td>\n", | |
| " <td>91.254</td>\n", | |
| " <td>90.9274</td>\n", | |
| " <td>89.5833</td>\n", | |
| " <td>90.8864</td>\n", | |
| " <td>91.2909</td>\n", | |
| " <td>91.3506</td>\n", | |
| " <td>90.8578</td>\n", | |
| " <td>91.3007</td>\n", | |
| " <td>89.5158</td>\n", | |
| " <td>90.8638</td>\n", | |
| " <td>90.9258</td>\n", | |
| " <td>89.7398</td>\n", | |
| " <td>91.633</td>\n", | |
| " <td>90.9966</td>\n", | |
| " <td>88.4162</td>\n", | |
| " <td>90.9528</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>10007795</th>\n", | |
| " <td>72.4769</td>\n", | |
| " <td>70.8454</td>\n", | |
| " <td>74.1373</td>\n", | |
| " <td>72.6671</td>\n", | |
| " <td>72.4931</td>\n", | |
| " <td>69.4444</td>\n", | |
| " <td>72.2322</td>\n", | |
| " <td>75.1547</td>\n", | |
| " <td>74.1532</td>\n", | |
| " <td>71.0338</td>\n", | |
| " <td>71.997</td>\n", | |
| " <td>73.4978</td>\n", | |
| " <td>69.9169</td>\n", | |
| " <td>72.7273</td>\n", | |
| " <td>73.136</td>\n", | |
| " <td>72.079</td>\n", | |
| " <td>71.7057</td>\n", | |
| " <td>74.4241</td>\n", | |
| " <td>80.7418</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>10007795</th>\n", | |
| " <td>83.5246</td>\n", | |
| " <td>82.0285</td>\n", | |
| " <td>84.5623</td>\n", | |
| " <td>84.2109</td>\n", | |
| " <td>83.507</td>\n", | |
| " <td>86.8056</td>\n", | |
| " <td>83.433</td>\n", | |
| " <td>84.5259</td>\n", | |
| " <td>85.7576</td>\n", | |
| " <td>82.5839</td>\n", | |
| " <td>83.5152</td>\n", | |
| " <td>83.2889</td>\n", | |
| " <td>81.4646</td>\n", | |
| " <td>83.7261</td>\n", | |
| " <td>84.0149</td>\n", | |
| " <td>83.2285</td>\n", | |
| " <td>83.1844</td>\n", | |
| " <td>84.1187</td>\n", | |
| " <td>87.2634</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years \\\n", | |
| " Unnamed: 1_level_1 \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 90.6797 \n", | |
| "Assessment and feedback 10007792 75.517 \n", | |
| "Academic support 10007792 83.8984 \n", | |
| "Non-continuation 10007792 2.2783 \n", | |
| "Employment or further study 10007792 94.4993 \n", | |
| "Highly skilled employment or further study 10007792 79.3124 \n", | |
| "The teaching on my course 10007792 SUP \n", | |
| "Assessment and feedback 10007792 SUP \n", | |
| "Academic support 10007792 SUP \n", | |
| "Non-continuation 10007792 21.0526 \n", | |
| "Employment or further study 10007792 91.1111 \n", | |
| "Highly skilled employment or further study 10007792 88.8889 \n", | |
| "The teaching on my course 10007795 90.9203 \n", | |
| "Assessment and feedback 10007795 72.4769 \n", | |
| "Academic support 10007795 83.5246 \n", | |
| "\n", | |
| " Years \\\n", | |
| " 1 2 \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 90.8339 89.8528 \n", | |
| "Assessment and feedback 10007792 76.4804 75.0354 \n", | |
| "Academic support 10007792 84.4193 83.9359 \n", | |
| "Non-continuation 10007792 2.12284 2.39577 \n", | |
| "Employment or further study 10007792 93.9013 95.2135 \n", | |
| "Highly skilled employment or further study 10007792 79.3296 79.1721 \n", | |
| "The teaching on my course 10007792 SUP SUP \n", | |
| "Assessment and feedback 10007792 SUP SUP \n", | |
| "Academic support 10007792 SUP SUP \n", | |
| "Non-continuation 10007792 N N \n", | |
| "Employment or further study 10007792 N DP \n", | |
| "Highly skilled employment or further study 10007792 N DP \n", | |
| "The teaching on my course 10007795 89.8789 91.7786 \n", | |
| "Assessment and feedback 10007795 70.8454 74.1373 \n", | |
| "Academic support 10007795 82.0285 84.5623 \n", | |
| "\n", | |
| " Level of study \\\n", | |
| " 3 First degree \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 91.2401 90.6797 \n", | |
| "Assessment and feedback 10007792 75.0201 75.517 \n", | |
| "Academic support 10007792 83.3757 83.8984 \n", | |
| "Non-continuation 10007792 2.3185 2.27954 \n", | |
| "Employment or further study 10007792 94.332 94.5236 \n", | |
| "Highly skilled employment or further study 10007792 79.4422 79.3698 \n", | |
| "The teaching on my course 10007792 N N \n", | |
| "Assessment and feedback 10007792 N N \n", | |
| "Academic support 10007792 N N \n", | |
| "Non-continuation 10007792 N 21.0526 \n", | |
| "Employment or further study 10007792 DP DP \n", | |
| "Highly skilled employment or further study 10007792 DP 86.9565 \n", | |
| "The teaching on my course 10007795 91.254 90.9274 \n", | |
| "Assessment and feedback 10007795 72.6671 72.4931 \n", | |
| "Academic support 10007795 84.2109 83.507 \n", | |
| "\n", | |
| " Age \\\n", | |
| " Other\\nUG Young \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 NaN 90.7622 \n", | |
| "Assessment and feedback 10007792 NaN 75.4647 \n", | |
| "Academic support 10007792 NaN 84.0099 \n", | |
| "Non-continuation 10007792 N 1.75764 \n", | |
| "Employment or further study 10007792 R 94.592 \n", | |
| "Highly skilled employment or further study 10007792 R 79.3065 \n", | |
| "The teaching on my course 10007792 SUP N \n", | |
| "Assessment and feedback 10007792 SUP N \n", | |
| "Academic support 10007792 SUP N \n", | |
| "Non-continuation 10007792 0 N \n", | |
| "Employment or further study 10007792 SUP N \n", | |
| "Highly skilled employment or further study 10007792 SUP N \n", | |
| "The teaching on my course 10007795 89.5833 90.8864 \n", | |
| "Assessment and feedback 10007795 69.4444 72.2322 \n", | |
| "Academic support 10007795 86.8056 83.433 \n", | |
| "\n", | |
| " Disadvantaged \\\n", | |
| " Mature Yes \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 89.6226 91.6187 \n", | |
| "Assessment and feedback 10007792 76.1871 74.9424 \n", | |
| "Academic support 10007792 82.4686 84.2166 \n", | |
| "Non-continuation 10007792 9.23277 3.33333 \n", | |
| "Employment or further study 10007792 93.0521 94.2377 \n", | |
| "Highly skilled employment or further study 10007792 79.4045 74.7899 \n", | |
| "The teaching on my course 10007792 SUP N \n", | |
| "Assessment and feedback 10007792 SUP N \n", | |
| "Academic support 10007792 SUP N \n", | |
| "Non-continuation 10007792 DP NaN \n", | |
| "Employment or further study 10007792 DP R \n", | |
| "Highly skilled employment or further study 10007792 DP R \n", | |
| "The teaching on my course 10007795 91.2909 91.3506 \n", | |
| "Assessment and feedback 10007795 75.1547 74.1532 \n", | |
| "Academic support 10007795 84.5259 85.7576 \n", | |
| "\n", | |
| " Ethnicity \\\n", | |
| " No White \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 91.5394 91.8816 \n", | |
| "Assessment and feedback 10007792 75.5491 75.8037 \n", | |
| "Academic support 10007792 83.9893 84.4267 \n", | |
| "Non-continuation 10007792 1.52757 2.2543 \n", | |
| "Employment or further study 10007792 94.6537 94.5472 \n", | |
| "Highly skilled employment or further study 10007792 80.011 79.0003 \n", | |
| "The teaching on my course 10007792 N N \n", | |
| "Assessment and feedback 10007792 N N \n", | |
| "Academic support 10007792 N N \n", | |
| "Non-continuation 10007792 N DP \n", | |
| "Employment or further study 10007792 N DP \n", | |
| "Highly skilled employment or further study 10007792 N DP \n", | |
| "The teaching on my course 10007795 90.8578 91.3007 \n", | |
| "Assessment and feedback 10007795 71.0338 71.997 \n", | |
| "Academic support 10007795 82.5839 83.5152 \n", | |
| "\n", | |
| " Disabled \\\n", | |
| " BME Yes \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 86.7551 91.2695 \n", | |
| "Assessment and feedback 10007792 73.7306 74.0418 \n", | |
| "Academic support 10007792 81.8901 84.3709 \n", | |
| "Non-continuation 10007792 2.61283 3.98773 \n", | |
| "Employment or further study 10007792 93.9394 93.7793 \n", | |
| "Highly skilled employment or further study 10007792 83.6364 77.8169 \n", | |
| "The teaching on my course 10007792 R N \n", | |
| "Assessment and feedback 10007792 R N \n", | |
| "Academic support 10007792 R N \n", | |
| "Non-continuation 10007792 N N \n", | |
| "Employment or further study 10007792 NaN N \n", | |
| "Highly skilled employment or further study 10007792 NaN N \n", | |
| "The teaching on my course 10007795 89.5158 90.8638 \n", | |
| "Assessment and feedback 10007795 73.4978 69.9169 \n", | |
| "Academic support 10007795 83.2889 81.4646 \n", | |
| "\n", | |
| " Sex \\\n", | |
| " No Male \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 90.6016 89.0266 \n", | |
| "Assessment and feedback 10007792 75.7124 74.0954 \n", | |
| "Academic support 10007792 83.8358 84.2393 \n", | |
| "Non-continuation 10007792 2.0488 2.44962 \n", | |
| "Employment or further study 10007792 94.6043 93.4256 \n", | |
| "Highly skilled employment or further study 10007792 79.5307 80.8116 \n", | |
| "The teaching on my course 10007792 SUP SUP \n", | |
| "Assessment and feedback 10007792 SUP SUP \n", | |
| "Academic support 10007792 SUP SUP \n", | |
| "Non-continuation 10007792 DP N \n", | |
| "Employment or further study 10007792 DP DP \n", | |
| "Highly skilled employment or further study 10007792 DP DP \n", | |
| "The teaching on my course 10007795 90.9258 89.7398 \n", | |
| "Assessment and feedback 10007795 72.7273 73.136 \n", | |
| "Academic support 10007795 83.7261 84.0149 \n", | |
| "\n", | |
| " Domicile \\\n", | |
| " Female UK \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 91.9115 91.5804 \n", | |
| "Assessment and feedback 10007792 76.5766 75.4667 \n", | |
| "Academic support 10007792 83.6444 83.958 \n", | |
| "Non-continuation 10007792 2.11622 0 \n", | |
| "Employment or further study 10007792 95.4714 0 \n", | |
| "Highly skilled employment or further study 10007792 77.955 0 \n", | |
| "The teaching on my course 10007792 SUP SUP \n", | |
| "Assessment and feedback 10007792 SUP SUP \n", | |
| "Academic support 10007792 SUP SUP \n", | |
| "Non-continuation 10007792 DP 0 \n", | |
| "Employment or further study 10007792 DP 0 \n", | |
| "Highly skilled employment or further study 10007792 DP 0 \n", | |
| "The teaching on my course 10007795 91.633 90.9966 \n", | |
| "Assessment and feedback 10007795 72.079 71.7057 \n", | |
| "Academic support 10007795 83.2285 83.1844 \n", | |
| "\n", | |
| " Type \n", | |
| " Other EU Non EU \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 92.212 85.0954 FT \n", | |
| "Assessment and feedback 10007792 75.1963 75.9003 FT \n", | |
| "Academic support 10007792 85.6894 83.0154 FT \n", | |
| "Non-continuation 10007792 0 0 FT \n", | |
| "Employment or further study 10007792 0 0 FT \n", | |
| "Highly skilled employment or further study 10007792 0 0 FT \n", | |
| "The teaching on my course 10007792 R N PT \n", | |
| "Assessment and feedback 10007792 R N PT \n", | |
| "Academic support 10007792 R N PT \n", | |
| "Non-continuation 10007792 0 0 PT \n", | |
| "Employment or further study 10007792 0 0 PT \n", | |
| "Highly skilled employment or further study 10007792 0 0 PT \n", | |
| "The teaching on my course 10007795 88.4162 90.9528 FT \n", | |
| "Assessment and feedback 10007795 74.4241 80.7418 FT \n", | |
| "Academic support 10007795 84.1187 87.2634 FT " | |
| ] | |
| }, | |
| "execution_count": 171, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "indicators.head(15)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 877, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| ",,All years,Years,Years,Years,Level of study,Level of study,Age,Age,Disadvantaged,Disadvantaged,Ethnicity,Ethnicity,Disabled,Disabled,Sex,Sex,Welsh medium,Domicile,Domicile,Domicile\r\n", | |
| ",,Unnamed: 1_level_1,1,2,3,First Degree,Other UG,Young,Mature,Yes,No,White,BME,Yes,No,Male,Female,Yes,UK,Other EU,Non EU\r\n", | |
| "Topic,PRN,,,,,,,,,,,,,,,,,,,,\r\n", | |
| "The teaching on my course,dfdf,87.2,90.68,88.92,85.84,92.03,80,83.07000000000001,88.43,83.44,90.91,89.23,88.45,84.94,91.65,82.42,90.66,,87.3,83.25,91.89\r\n", | |
| "Assessment and feedback ,dfdf,73.3,72.14,75.83,70.06,74.24,69.09,68.67,75.75,76.24,71.81,68.87,76.31,77.02,69.86999999999999,68.9,74.96,,73.86999999999999,68.53999999999999,72.61999999999999\r\n", | |
| "Academic support,dfdf,81.99,79.76,81.80000000000001,82.32000000000001,76.56,75.58,83.2,79.02000000000001,77.16000000000001,77.57000000000001,81.65,78.79,81.04,75.46000000000001,83.72,76.7,,78.41000000000001,83.42,80.22\r\n", | |
| "Non-continuation,dfdf,5.317601422337159,6.427601422337159,3.527601422337159,4.847601422337159,4.887601422337159,6.617601422337159,4.6676014223371585,7.017601422337159,4.237601422337159,4.9176014223371585,7.287601422337159,6.327601422337159,3.687601422337159,4.307601422337159,5.227601422337159,7.55,,,,\r\n", | |
| "Employment or further study ,dfdf,92.2,93.39,92.02,96.39,90.21000000000001,92.84,89.37,93.11,97.12,87.48,94.9,90.19,91.78,91.84,91.07000000000001,95.51,,,,\r\n", | |
| "Highly skilled employment,dfdf,74.1,73.3,77.24,76.24,70.30999999999999,74.38,75.83,76.91,76.58999999999999,72.13,72.75999999999999,76.14,74.14,69.86999999999999,77.13,70.52,,,,\r\n", | |
| "The teaching on my course ,dfdf,90.1,91.38,86.30999999999999,85.55,92.78999999999999,86.86,N,86.1,89.5,87.5,90.99,91.28,N,93.28,94.78,90.94999999999999,,94.85,89.96,89.52\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!head tef_indicators.csv" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 878, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Ethnicity</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disabled</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th>Welsh medium</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Domicile</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First Degree</th>\n", | |
| " <th>Other UG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>White</th>\n", | |
| " <th>BME</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <th>dfdf</th>\n", | |
| " <td>87.200000</td>\n", | |
| " <td>90.680000</td>\n", | |
| " <td>88.920000</td>\n", | |
| " <td>85.840000</td>\n", | |
| " <td>92.030000</td>\n", | |
| " <td>80.000000</td>\n", | |
| " <td>83.07000000000001</td>\n", | |
| " <td>88.430000</td>\n", | |
| " <td>83.440000</td>\n", | |
| " <td>90.910000</td>\n", | |
| " <td>89.230000</td>\n", | |
| " <td>88.450000</td>\n", | |
| " <td>84.94</td>\n", | |
| " <td>91.650000</td>\n", | |
| " <td>82.420000</td>\n", | |
| " <td>90.66</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>87.30</td>\n", | |
| " <td>83.25</td>\n", | |
| " <td>91.89</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>dfdf</th>\n", | |
| " <td>73.300000</td>\n", | |
| " <td>72.140000</td>\n", | |
| " <td>75.830000</td>\n", | |
| " <td>70.060000</td>\n", | |
| " <td>74.240000</td>\n", | |
| " <td>69.090000</td>\n", | |
| " <td>68.67</td>\n", | |
| " <td>75.750000</td>\n", | |
| " <td>76.240000</td>\n", | |
| " <td>71.810000</td>\n", | |
| " <td>68.870000</td>\n", | |
| " <td>76.310000</td>\n", | |
| " <td>77.02</td>\n", | |
| " <td>69.870000</td>\n", | |
| " <td>68.900000</td>\n", | |
| " <td>74.96</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>73.87</td>\n", | |
| " <td>68.54</td>\n", | |
| " <td>72.62</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>dfdf</th>\n", | |
| " <td>81.990000</td>\n", | |
| " <td>79.760000</td>\n", | |
| " <td>81.800000</td>\n", | |
| " <td>82.320000</td>\n", | |
| " <td>76.560000</td>\n", | |
| " <td>75.580000</td>\n", | |
| " <td>83.2</td>\n", | |
| " <td>79.020000</td>\n", | |
| " <td>77.160000</td>\n", | |
| " <td>77.570000</td>\n", | |
| " <td>81.650000</td>\n", | |
| " <td>78.790000</td>\n", | |
| " <td>81.04</td>\n", | |
| " <td>75.460000</td>\n", | |
| " <td>83.720000</td>\n", | |
| " <td>76.70</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>78.41</td>\n", | |
| " <td>83.42</td>\n", | |
| " <td>80.22</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <th>dfdf</th>\n", | |
| " <td>5.317601</td>\n", | |
| " <td>6.427601</td>\n", | |
| " <td>3.527601</td>\n", | |
| " <td>4.847601</td>\n", | |
| " <td>4.887601</td>\n", | |
| " <td>6.617601</td>\n", | |
| " <td>4.6676014223371585</td>\n", | |
| " <td>7.017601</td>\n", | |
| " <td>4.237601</td>\n", | |
| " <td>4.917601</td>\n", | |
| " <td>7.287601</td>\n", | |
| " <td>6.327601</td>\n", | |
| " <td>3.687601422337159</td>\n", | |
| " <td>4.307601</td>\n", | |
| " <td>5.227601</td>\n", | |
| " <td>7.55</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <th>dfdf</th>\n", | |
| " <td>92.200000</td>\n", | |
| " <td>93.390000</td>\n", | |
| " <td>92.020000</td>\n", | |
| " <td>96.390000</td>\n", | |
| " <td>90.210000</td>\n", | |
| " <td>92.840000</td>\n", | |
| " <td>89.37</td>\n", | |
| " <td>93.110000</td>\n", | |
| " <td>97.120000</td>\n", | |
| " <td>87.480000</td>\n", | |
| " <td>94.900000</td>\n", | |
| " <td>90.190000</td>\n", | |
| " <td>91.78</td>\n", | |
| " <td>91.840000</td>\n", | |
| " <td>91.070000</td>\n", | |
| " <td>95.51</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years Years \\\n", | |
| " Unnamed: 1_level_1 1 2 \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 87.200000 90.680000 88.920000 \n", | |
| "Assessment and feedback dfdf 73.300000 72.140000 75.830000 \n", | |
| "Academic support dfdf 81.990000 79.760000 81.800000 \n", | |
| "Non-continuation dfdf 5.317601 6.427601 3.527601 \n", | |
| "Employment or further study dfdf 92.200000 93.390000 92.020000 \n", | |
| "\n", | |
| " Level of study \\\n", | |
| " 3 First Degree Other UG \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 85.840000 92.030000 80.000000 \n", | |
| "Assessment and feedback dfdf 70.060000 74.240000 69.090000 \n", | |
| "Academic support dfdf 82.320000 76.560000 75.580000 \n", | |
| "Non-continuation dfdf 4.847601 4.887601 6.617601 \n", | |
| "Employment or further study dfdf 96.390000 90.210000 92.840000 \n", | |
| "\n", | |
| " Age \\\n", | |
| " Young Mature \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 83.07000000000001 88.430000 \n", | |
| "Assessment and feedback dfdf 68.67 75.750000 \n", | |
| "Academic support dfdf 83.2 79.020000 \n", | |
| "Non-continuation dfdf 4.6676014223371585 7.017601 \n", | |
| "Employment or further study dfdf 89.37 93.110000 \n", | |
| "\n", | |
| " Disadvantaged Ethnicity \\\n", | |
| " Yes No White \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 83.440000 90.910000 89.230000 \n", | |
| "Assessment and feedback dfdf 76.240000 71.810000 68.870000 \n", | |
| "Academic support dfdf 77.160000 77.570000 81.650000 \n", | |
| "Non-continuation dfdf 4.237601 4.917601 7.287601 \n", | |
| "Employment or further study dfdf 97.120000 87.480000 94.900000 \n", | |
| "\n", | |
| " Disabled \\\n", | |
| " BME Yes No \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 88.450000 84.94 91.650000 \n", | |
| "Assessment and feedback dfdf 76.310000 77.02 69.870000 \n", | |
| "Academic support dfdf 78.790000 81.04 75.460000 \n", | |
| "Non-continuation dfdf 6.327601 3.687601422337159 4.307601 \n", | |
| "Employment or further study dfdf 90.190000 91.78 91.840000 \n", | |
| "\n", | |
| " Sex Welsh medium Domicile \\\n", | |
| " Male Female Yes UK \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 82.420000 90.66 NaN 87.30 \n", | |
| "Assessment and feedback dfdf 68.900000 74.96 NaN 73.87 \n", | |
| "Academic support dfdf 83.720000 76.70 NaN 78.41 \n", | |
| "Non-continuation dfdf 5.227601 7.55 NaN NaN \n", | |
| "Employment or further study dfdf 91.070000 95.51 NaN NaN \n", | |
| "\n", | |
| " \n", | |
| " Other EU Non EU \n", | |
| "Topic PRN \n", | |
| "The teaching on my course dfdf 83.25 91.89 \n", | |
| "Assessment and feedback dfdf 68.54 72.62 \n", | |
| "Academic support dfdf 83.42 80.22 \n", | |
| "Non-continuation dfdf NaN NaN \n", | |
| "Employment or further study dfdf NaN NaN " | |
| ] | |
| }, | |
| "execution_count": 878, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "pd.read_csv('tef_indicators.csv',header=[0,1],index_col=[0,1]).head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'10007792'" | |
| ] | |
| }, | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dummyPRN=indicators.index.get_level_values('PRN').unique().tolist()[0]\n", | |
| "dummyPRN" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 123, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Topic\n", | |
| "Full-time headcount: NaN\n", | |
| "The teaching on my course 90.7622\n", | |
| "Assessment and feedback 75.4647\n", | |
| "Academic support 84.0099\n", | |
| "Non-continuation 1.75764\n", | |
| "Employment or further study 94.592\n", | |
| "Highly skilled employment or further study 79.3065\n", | |
| "Part-time headcount: NaN\n", | |
| "The teaching on my course N\n", | |
| "Assessment and feedback N\n", | |
| "Academic support N\n", | |
| "Non-continuation N\n", | |
| "Employment or further study N\n", | |
| "Highly skilled employment or further study N\n", | |
| "NaN MET_WHITE\n", | |
| "NaN TEFXXXAGE_Y\n", | |
| "NaN INDICATOR_\\nTEFXXXAGE_\\nY\n", | |
| "NaN NaN\n", | |
| "NaN NaN\n", | |
| "NaN NaN\n", | |
| "N NaN\n", | |
| "R NaN\n", | |
| "SUP NaN\n", | |
| "NaN NaN\n", | |
| "DP NaN\n", | |
| "Name: (Age, Young), dtype: object" | |
| ] | |
| }, | |
| "execution_count": 123, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Example cross-section\n", | |
| "indicators.xs(dummyPRN, level='PRN')['Age','Young']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 881, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['dfdf', 'JHDH']" | |
| ] | |
| }, | |
| "execution_count": 881, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Display the PRNs in the index\n", | |
| "indicators.index.get_level_values('PRN').unique().tolist()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 882, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>87.30</td>\n", | |
| " <td>83.25</td>\n", | |
| " <td>91.89</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>73.87</td>\n", | |
| " <td>68.54</td>\n", | |
| " <td>72.62</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>78.41</td>\n", | |
| " <td>83.42</td>\n", | |
| " <td>80.22</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>94.85</td>\n", | |
| " <td>89.96</td>\n", | |
| " <td>89.52</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>74.95</td>\n", | |
| " <td>77.69</td>\n", | |
| " <td>82.98</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>86.12</td>\n", | |
| " <td>83.30</td>\n", | |
| " <td>83.60</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " UK Other EU Non EU\n", | |
| "Topic \n", | |
| "The teaching on my course 87.30 83.25 91.89\n", | |
| "Assessment and feedback 73.87 68.54 72.62\n", | |
| "Academic support 78.41 83.42 80.22\n", | |
| "Non-continuation NaN NaN NaN\n", | |
| "Employment or further study NaN NaN NaN\n", | |
| "Highly skilled employment NaN NaN NaN\n", | |
| "The teaching on my course 94.85 89.96 89.52\n", | |
| "Assessment and feedback 74.95 77.69 82.98\n", | |
| "Academic support 86.12 83.30 83.60\n", | |
| "Non-continuation NaN NaN NaN\n", | |
| "Employment or further study NaN NaN NaN\n", | |
| "Highly skilled employment NaN NaN NaN" | |
| ] | |
| }, | |
| "execution_count": 882, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Example filter on column and PRN\n", | |
| "indicators.xs('JHDH', level='PRN')['Domicile']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 883, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>430</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>JHDH</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>UK</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>431</th>\n", | |
| " <td>Highly skilled employment</td>\n", | |
| " <td>JHDH</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>UK</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>432</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>83.25</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>433</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>68.54</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>434</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>83.42</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>435</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>436</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>437</th>\n", | |
| " <td>Highly skilled employment</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>438</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>89.96</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>439</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>Domicile</td>\n", | |
| " <td>Other EU</td>\n", | |
| " <td>77.69</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Heading Subheading value\n", | |
| "430 Employment or further study JHDH Domicile UK NaN\n", | |
| "431 Highly skilled employment JHDH Domicile UK NaN\n", | |
| "432 The teaching on my course dfdf Domicile Other EU 83.25\n", | |
| "433 Assessment and feedback dfdf Domicile Other EU 68.54\n", | |
| "434 Academic support dfdf Domicile Other EU 83.42\n", | |
| "435 Non-continuation dfdf Domicile Other EU NaN\n", | |
| "436 Employment or further study dfdf Domicile Other EU NaN\n", | |
| "437 Highly skilled employment dfdf Domicile Other EU NaN\n", | |
| "438 The teaching on my course dfdf Domicile Other EU 89.96\n", | |
| "439 Assessment and feedback dfdf Domicile Other EU 77.69" | |
| ] | |
| }, | |
| "execution_count": 883, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "indicators_long=longify(indicators)\n", | |
| "indicators_long.tail(50).head(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 821, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "indicators_long.to_csv('tef_indicators_long.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Benchmarks (b)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 131, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#This code is similar to that used for Zscores sheet\n", | |
| "def get_benchmarks(fn):\n", | |
| " sn='Benchmark (b)'\n", | |
| " print(fn)\n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| "\n", | |
| " z=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=21, header=[0,1]))\n", | |
| " z['PRN']=prn\n", | |
| " zft=z[1:7][:]\n", | |
| " zft['Type']='FT'\n", | |
| " zpt=z[8:14][:]\n", | |
| " zpt['Type']='PT'\n", | |
| " z=pd.concat([zft,zpt]).dropna(how='all',axis=1)\n", | |
| " \n", | |
| " sd=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=30,parse_cols=21, header=[0,1]))\n", | |
| " sd['PRN']=prn\n", | |
| " sdft=sd[1:7][:]\n", | |
| " sdft['Type']='FT'\n", | |
| " sdpt=sd[8:14][:]\n", | |
| " sdpt['Type']='PT'\n", | |
| " \n", | |
| " sd=pd.concat([sdft,sdpt]).dropna(how='all',axis=1)\n", | |
| " \n", | |
| " z['Benchmark type']='benchmark'\n", | |
| " sd['Benchmark type']='providercontrib'\n", | |
| " \n", | |
| " z.index.names=['Topic']\n", | |
| " sd.index.names=['Topic']\n", | |
| " \n", | |
| " return z,sd\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 132, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-132-06b8eac6d95c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'~'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mbenchmark_tmp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprovidercontrib_tmp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mget_benchmarks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mbenchmark\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbenchmark\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mbenchmark_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mprovidercontrib\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mprovidercontrib\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprovidercontrib_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-131-af1240caaa5a>\u001b[0m in \u001b[0;36mget_benchmarks\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mzpt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Type'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'PT'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0msd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcleanCols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mparse_cols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m21\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0msd\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PRN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0msdft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msd\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 547\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mrow_tag\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 548\u001b[0;31m \u001b[0mself_do_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 549\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# destroy all child elements (cells)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"dimension\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mdo_row\u001b[0;34m(self, row_elem)\u001b[0m\n\u001b[1;32m 663\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mexplicit_row_number\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcharx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mrow_number\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cell name %r but row number is %r'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow_number\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 665\u001b[0;31m \u001b[0mxf_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell_elem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m's'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'0'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 666\u001b[0m \u001b[0mcell_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcell_elem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m't'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 667\u001b[0m \u001b[0mtvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "benchmark=pd.DataFrame()\n", | |
| "providercontrib=pd.DataFrame()\n", | |
| "\n", | |
| "for filename in [f for f in os.listdir(directory) if not f.startswith('~')]:\n", | |
| " benchmark_tmp,providercontrib_tmp= get_benchmarks(fn='{}/{}'.format(directory,filename))\n", | |
| " benchmark=pd.concat([benchmark,benchmark_tmp])\n", | |
| " providercontrib=pd.concat([providercontrib,providercontrib_tmp])\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 172, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Ethnicity</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disabled</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Domicile</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Type</th>\n", | |
| " <th>Benchmark type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First degree</th>\n", | |
| " <th>Other\\nUG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>...</th>\n", | |
| " <th>BME</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>88.3309</td>\n", | |
| " <td>87.7923</td>\n", | |
| " <td>88.632</td>\n", | |
| " <td>88.5606</td>\n", | |
| " <td>88.4478</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>88.4035</td>\n", | |
| " <td>87.3995</td>\n", | |
| " <td>88.7461</td>\n", | |
| " <td>89.4115</td>\n", | |
| " <td>...</td>\n", | |
| " <td>84.7989</td>\n", | |
| " <td>87.9403</td>\n", | |
| " <td>88.3826</td>\n", | |
| " <td>87.3999</td>\n", | |
| " <td>89.0245</td>\n", | |
| " <td>89.1227</td>\n", | |
| " <td>86.6562</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>benchmark</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>72.0203</td>\n", | |
| " <td>71.0119</td>\n", | |
| " <td>72.1508</td>\n", | |
| " <td>72.8637</td>\n", | |
| " <td>71.8199</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>71.9077</td>\n", | |
| " <td>73.463</td>\n", | |
| " <td>73.4956</td>\n", | |
| " <td>72.1592</td>\n", | |
| " <td>...</td>\n", | |
| " <td>70.5695</td>\n", | |
| " <td>70.3514</td>\n", | |
| " <td>72.2414</td>\n", | |
| " <td>71.8281</td>\n", | |
| " <td>72.1635</td>\n", | |
| " <td>72.1379</td>\n", | |
| " <td>69.7941</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>benchmark</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>81.8189</td>\n", | |
| " <td>81.2489</td>\n", | |
| " <td>82.1147</td>\n", | |
| " <td>82.1206</td>\n", | |
| " <td>81.8246</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>81.9071</td>\n", | |
| " <td>80.689</td>\n", | |
| " <td>82.9026</td>\n", | |
| " <td>82.2227</td>\n", | |
| " <td>...</td>\n", | |
| " <td>79.5073</td>\n", | |
| " <td>80.9901</td>\n", | |
| " <td>81.9287</td>\n", | |
| " <td>82.543</td>\n", | |
| " <td>81.2794</td>\n", | |
| " <td>82.1206</td>\n", | |
| " <td>81.6937</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>benchmark</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>3.44304</td>\n", | |
| " <td>3.37228</td>\n", | |
| " <td>3.56405</td>\n", | |
| " <td>3.42482</td>\n", | |
| " <td>3.39128</td>\n", | |
| " <td>N</td>\n", | |
| " <td>2.83198</td>\n", | |
| " <td>11.612</td>\n", | |
| " <td>3.84689</td>\n", | |
| " <td>2.46721</td>\n", | |
| " <td>...</td>\n", | |
| " <td>4.03444</td>\n", | |
| " <td>5.27223</td>\n", | |
| " <td>3.21838</td>\n", | |
| " <td>4.05566</td>\n", | |
| " <td>2.94869</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>benchmark</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>93.9778</td>\n", | |
| " <td>93.6149</td>\n", | |
| " <td>94.0882</td>\n", | |
| " <td>94.311</td>\n", | |
| " <td>93.9752</td>\n", | |
| " <td>R</td>\n", | |
| " <td>94.0575</td>\n", | |
| " <td>92.7333</td>\n", | |
| " <td>94.0585</td>\n", | |
| " <td>94.1474</td>\n", | |
| " <td>...</td>\n", | |
| " <td>92.4344</td>\n", | |
| " <td>92.3242</td>\n", | |
| " <td>94.1863</td>\n", | |
| " <td>92.4983</td>\n", | |
| " <td>95.3174</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>benchmark</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 21 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years Years \\\n", | |
| " Unnamed: 1_level_1 1 2 3 \n", | |
| "Topic \n", | |
| "The teaching on my course 88.3309 87.7923 88.632 88.5606 \n", | |
| "Assessment and feedback 72.0203 71.0119 72.1508 72.8637 \n", | |
| "Academic support 81.8189 81.2489 82.1147 82.1206 \n", | |
| "Non-continuation 3.44304 3.37228 3.56405 3.42482 \n", | |
| "Employment or further study 93.9778 93.6149 94.0882 94.311 \n", | |
| "\n", | |
| " Level of study Age \\\n", | |
| " First degree Other\\nUG Young Mature \n", | |
| "Topic \n", | |
| "The teaching on my course 88.4478 NaN 88.4035 87.3995 \n", | |
| "Assessment and feedback 71.8199 NaN 71.9077 73.463 \n", | |
| "Academic support 81.8246 NaN 81.9071 80.689 \n", | |
| "Non-continuation 3.39128 N 2.83198 11.612 \n", | |
| "Employment or further study 93.9752 R 94.0575 92.7333 \n", | |
| "\n", | |
| " Disadvantaged ... Ethnicity \\\n", | |
| " Yes No ... BME \n", | |
| "Topic ... \n", | |
| "The teaching on my course 88.7461 89.4115 ... 84.7989 \n", | |
| "Assessment and feedback 73.4956 72.1592 ... 70.5695 \n", | |
| "Academic support 82.9026 82.2227 ... 79.5073 \n", | |
| "Non-continuation 3.84689 2.46721 ... 4.03444 \n", | |
| "Employment or further study 94.0585 94.1474 ... 92.4344 \n", | |
| "\n", | |
| " Disabled Sex Domicile \\\n", | |
| " Yes No Male Female UK \n", | |
| "Topic \n", | |
| "The teaching on my course 87.9403 88.3826 87.3999 89.0245 89.1227 \n", | |
| "Assessment and feedback 70.3514 72.2414 71.8281 72.1635 72.1379 \n", | |
| "Academic support 80.9901 81.9287 82.543 81.2794 82.1206 \n", | |
| "Non-continuation 5.27223 3.21838 4.05566 2.94869 0 \n", | |
| "Employment or further study 92.3242 94.1863 92.4983 95.3174 0 \n", | |
| "\n", | |
| " PRN Type Benchmark type \n", | |
| " Other EU \n", | |
| "Topic \n", | |
| "The teaching on my course 86.6562 10007792 FT benchmark \n", | |
| "Assessment and feedback 69.7941 10007792 FT benchmark \n", | |
| "Academic support 81.6937 10007792 FT benchmark \n", | |
| "Non-continuation 0 10007792 FT benchmark \n", | |
| "Employment or further study 0 10007792 FT benchmark \n", | |
| "\n", | |
| "[5 rows x 21 columns]" | |
| ] | |
| }, | |
| "execution_count": 172, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "benchmark.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 861, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th>...</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disabled</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Domicile</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Type</th>\n", | |
| " <th>Benchmark type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First Degree</th>\n", | |
| " <th>Other UG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>4.46</td>\n", | |
| " <td>3.81</td>\n", | |
| " <td>2.65</td>\n", | |
| " <td>2.2</td>\n", | |
| " <td>2.57</td>\n", | |
| " <td>2.85</td>\n", | |
| " <td>3.39</td>\n", | |
| " <td>4.27</td>\n", | |
| " <td>3.3</td>\n", | |
| " <td>4.26</td>\n", | |
| " <td>...</td>\n", | |
| " <td>4.21</td>\n", | |
| " <td>2.32</td>\n", | |
| " <td>2.16</td>\n", | |
| " <td>3.77</td>\n", | |
| " <td>3.25</td>\n", | |
| " <td>3.52</td>\n", | |
| " <td>4.69</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>3.06</td>\n", | |
| " <td>4.4</td>\n", | |
| " <td>4.88</td>\n", | |
| " <td>4.75</td>\n", | |
| " <td>4.33</td>\n", | |
| " <td>2.64</td>\n", | |
| " <td>2.03</td>\n", | |
| " <td>4.48</td>\n", | |
| " <td>4.7</td>\n", | |
| " <td>2.79</td>\n", | |
| " <td>...</td>\n", | |
| " <td>3.81</td>\n", | |
| " <td>4.34</td>\n", | |
| " <td>4.74</td>\n", | |
| " <td>2.05</td>\n", | |
| " <td>4.1</td>\n", | |
| " <td>4.52</td>\n", | |
| " <td>4.24</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>2.98</td>\n", | |
| " <td>3.48</td>\n", | |
| " <td>3.71</td>\n", | |
| " <td>3.09</td>\n", | |
| " <td>2.54</td>\n", | |
| " <td>3.36</td>\n", | |
| " <td>2.75</td>\n", | |
| " <td>2.58</td>\n", | |
| " <td>4.17</td>\n", | |
| " <td>2.74</td>\n", | |
| " <td>...</td>\n", | |
| " <td>3.14</td>\n", | |
| " <td>3.19</td>\n", | |
| " <td>2.19</td>\n", | |
| " <td>3.71</td>\n", | |
| " <td>3.07</td>\n", | |
| " <td>3.08</td>\n", | |
| " <td>2.03</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>4.91</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2.69</td>\n", | |
| " <td>4.89</td>\n", | |
| " <td>2.1</td>\n", | |
| " <td>4.86</td>\n", | |
| " <td>4.07</td>\n", | |
| " <td>3.02</td>\n", | |
| " <td>2.08</td>\n", | |
| " <td>3.35</td>\n", | |
| " <td>...</td>\n", | |
| " <td>2.29</td>\n", | |
| " <td>2.5</td>\n", | |
| " <td>3.71</td>\n", | |
| " <td>2.61</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>2.3</td>\n", | |
| " <td>0</td>\n", | |
| " <td>3.42</td>\n", | |
| " <td>2.69</td>\n", | |
| " <td>3.92</td>\n", | |
| " <td>3.11</td>\n", | |
| " <td>2.02</td>\n", | |
| " <td>4.35</td>\n", | |
| " <td>2.53</td>\n", | |
| " <td>4.36</td>\n", | |
| " <td>...</td>\n", | |
| " <td>4.91</td>\n", | |
| " <td>3.1</td>\n", | |
| " <td>3.01</td>\n", | |
| " <td>3.60</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 22 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years Years \\\n", | |
| " Unnamed: 1_level_1 1 2 3 \n", | |
| "Topic \n", | |
| "The teaching on my course 4.46 3.81 2.65 2.2 \n", | |
| "Assessment and feedback 3.06 4.4 4.88 4.75 \n", | |
| "Academic support 2.98 3.48 3.71 3.09 \n", | |
| "Non-continuation 4.91 0 2.69 4.89 \n", | |
| "Employment or further study 2.3 0 3.42 2.69 \n", | |
| "\n", | |
| " Level of study Age \\\n", | |
| " First Degree Other UG Young Mature \n", | |
| "Topic \n", | |
| "The teaching on my course 2.57 2.85 3.39 4.27 \n", | |
| "Assessment and feedback 4.33 2.64 2.03 4.48 \n", | |
| "Academic support 2.54 3.36 2.75 2.58 \n", | |
| "Non-continuation 2.1 4.86 4.07 3.02 \n", | |
| "Employment or further study 3.92 3.11 2.02 4.35 \n", | |
| "\n", | |
| " Disadvantaged ... Disabled \\\n", | |
| " Yes No ... Yes \n", | |
| "Topic ... \n", | |
| "The teaching on my course 3.3 4.26 ... 4.21 \n", | |
| "Assessment and feedback 4.7 2.79 ... 3.81 \n", | |
| "Academic support 4.17 2.74 ... 3.14 \n", | |
| "Non-continuation 2.08 3.35 ... 2.29 \n", | |
| "Employment or further study 2.53 4.36 ... 4.91 \n", | |
| "\n", | |
| " Sex Domicile PRN \\\n", | |
| " No Male Female UK Other EU Non EU \n", | |
| "Topic \n", | |
| "The teaching on my course 2.32 2.16 3.77 3.25 3.52 4.69 nan \n", | |
| "Assessment and feedback 4.34 4.74 2.05 4.1 4.52 4.24 nan \n", | |
| "Academic support 3.19 2.19 3.71 3.07 3.08 2.03 nan \n", | |
| "Non-continuation 2.5 3.71 2.61 NaN NaN NaN nan \n", | |
| "Employment or further study 3.1 3.01 3.60 NaN NaN NaN nan \n", | |
| "\n", | |
| " Type Benchmark type \n", | |
| " \n", | |
| "Topic \n", | |
| "The teaching on my course FT providercontrib \n", | |
| "Assessment and feedback FT providercontrib \n", | |
| "Academic support FT providercontrib \n", | |
| "Non-continuation FT providercontrib \n", | |
| "Employment or further study FT providercontrib \n", | |
| "\n", | |
| "[5 rows x 22 columns]" | |
| ] | |
| }, | |
| "execution_count": 861, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "providercontrib.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 891, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Benchmark type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>85.737368</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>71.064694</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>80.825662</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>6.100000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>86.800000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Benchmark type Heading \\\n", | |
| "0 The teaching on my course nan benchmark All years \n", | |
| "1 Assessment and feedback nan benchmark All years \n", | |
| "2 Academic support nan benchmark All years \n", | |
| "3 Non-continuation nan benchmark All years \n", | |
| "4 Employment or further study nan benchmark All years \n", | |
| "\n", | |
| " Subheading value \n", | |
| "0 Unnamed: 1_level_1 85.737368 \n", | |
| "1 Unnamed: 1_level_1 71.064694 \n", | |
| "2 Unnamed: 1_level_1 80.825662 \n", | |
| "3 Unnamed: 1_level_1 6.100000 \n", | |
| "4 Unnamed: 1_level_1 86.800000 " | |
| ] | |
| }, | |
| "execution_count": 891, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "benchmark_long=longify(benchmark,id_vars=['Topic','PRN','Benchmark type'],valname='value')\n", | |
| "benchmark_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 892, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Benchmark type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>4.46</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>3.06</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>2.98</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>4.91</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>providercontrib</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>2.30</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Benchmark type Heading \\\n", | |
| "0 The teaching on my course nan providercontrib All years \n", | |
| "1 Assessment and feedback nan providercontrib All years \n", | |
| "2 Academic support nan providercontrib All years \n", | |
| "3 Non-continuation nan providercontrib All years \n", | |
| "4 Employment or further study nan providercontrib All years \n", | |
| "\n", | |
| " Subheading value \n", | |
| "0 Unnamed: 1_level_1 4.46 \n", | |
| "1 Unnamed: 1_level_1 3.06 \n", | |
| "2 Unnamed: 1_level_1 2.98 \n", | |
| "3 Unnamed: 1_level_1 4.91 \n", | |
| "4 Unnamed: 1_level_1 2.30 " | |
| ] | |
| }, | |
| "execution_count": 892, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "providercontrib_long=longify(providercontrib,id_vars=['Topic','PRN','Benchmark type'],valname='value')\n", | |
| "providercontrib_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 937, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Benchmark type</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>85.737368</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>71.064694</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>80.825662</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>6.100000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>benchmark</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>86.800000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Benchmark type Heading \\\n", | |
| "0 The teaching on my course nan benchmark All years \n", | |
| "1 Assessment and feedback nan benchmark All years \n", | |
| "2 Academic support nan benchmark All years \n", | |
| "3 Non-continuation nan benchmark All years \n", | |
| "4 Employment or further study nan benchmark All years \n", | |
| "\n", | |
| " Subheading value \n", | |
| "0 Unnamed: 1_level_1 85.737368 \n", | |
| "1 Unnamed: 1_level_1 71.064694 \n", | |
| "2 Unnamed: 1_level_1 80.825662 \n", | |
| "3 Unnamed: 1_level_1 6.100000 \n", | |
| "4 Unnamed: 1_level_1 86.800000 " | |
| ] | |
| }, | |
| "execution_count": 937, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "benchmarkprovider=pd.concat([benchmark_long,providercontrib_long])\n", | |
| "benchmarkprovider.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 941, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th>Benchmark type</th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>benchmark</th>\n", | |
| " <th>providercontrib</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>Age</td>\n", | |
| " <td>Mature</td>\n", | |
| " <td>76.865662</td>\n", | |
| " <td>2.58</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>Age</td>\n", | |
| " <td>Young</td>\n", | |
| " <td>78.745662</td>\n", | |
| " <td>2.75</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>80.825662</td>\n", | |
| " <td>2.98</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>Disabled</td>\n", | |
| " <td>No</td>\n", | |
| " <td>76.395662</td>\n", | |
| " <td>3.19</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>Disabled</td>\n", | |
| " <td>Yes</td>\n", | |
| " <td>77.925662</td>\n", | |
| " <td>3.14</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| "Benchmark type Topic PRN Heading Subheading \\\n", | |
| "0 Academic support nan Age Mature \n", | |
| "1 Academic support nan Age Young \n", | |
| "2 Academic support nan All years Unnamed: 1_level_1 \n", | |
| "3 Academic support nan Disabled No \n", | |
| "4 Academic support nan Disabled Yes \n", | |
| "\n", | |
| "Benchmark type benchmark providercontrib \n", | |
| "0 76.865662 2.58 \n", | |
| "1 78.745662 2.75 \n", | |
| "2 80.825662 2.98 \n", | |
| "3 76.395662 3.19 \n", | |
| "4 77.925662 3.14 " | |
| ] | |
| }, | |
| "execution_count": 941, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "benchmarkprovider_wide=benchmarkprovider.pivot_table(index=['Topic','PRN','Heading','Subheading'],\n", | |
| " columns='Benchmark type',\n", | |
| " values='value').reset_index()\n", | |
| "benchmarkprovider_wide.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "benchmarkprovider_wide.to_csv('tef_benchmarkprovider_wide.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Differences (a-b)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 149, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Thic code is actually same as for indicators I think?\n", | |
| "def get_differences(fn):\n", | |
| " sn='Difference (a-b)'\n", | |
| " print(fn)\n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| "\n", | |
| " i=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=22,\n", | |
| " header=[0,1]).dropna(how='all',axis=0).dropna(how='all',axis=1))\n", | |
| " i.index.names=['Topic']\n", | |
| " i['PRN']=prn\n", | |
| " \n", | |
| " ift=i[1:7][:]\n", | |
| " ift['Type']='FT'\n", | |
| " ipt=i[8:14][:]\n", | |
| " ipt['Type']='PT'\n", | |
| " \n", | |
| " i=pd.concat([ift,ipt]).dropna(how='all',axis=1)\n", | |
| "\n", | |
| " i=i[:].reset_index().set_index(['Topic','PRN'])\n", | |
| "\n", | |
| " return i" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 150, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-150-b6079aa62e26>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'~'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mdifferences\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdifferences\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mget_differences\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mdifferences\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m#.to_csv('tef_differences.csv')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-149-a35ae05f22a5>\u001b[0m in \u001b[0;36mget_differences\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m i=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=22,\n\u001b[0;32m----> 8\u001b[0;31m header=[0,1]).dropna(how='all',axis=0).dropna(how='all',axis=1))\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Topic'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PRN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0mrow_tag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"row\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 545\u001b[0m \u001b[0mself_do_row\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_row\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 546\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 547\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mrow_tag\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 548\u001b[0m \u001b[0mself_do_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/lib/python3.5/xml/etree/ElementTree.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1295\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1297\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1298\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1299\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "differences=pd.DataFrame()\n", | |
| "\n", | |
| "for filename in [f for f in os.listdir(directory) if not f.startswith('~')]:\n", | |
| " differences=pd.concat([differences,get_differences('{}/{}'.format(directory,filename))])\n", | |
| " \n", | |
| "differences.head()#.to_csv('tef_differences.csv')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 173, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Ethnicity</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disabled</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Domicile</th>\n", | |
| " <th>Type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First degree</th>\n", | |
| " <th>Other\\nUG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>White</th>\n", | |
| " <th>BME</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>2.34886</td>\n", | |
| " <td>3.04161</td>\n", | |
| " <td>1.22077</td>\n", | |
| " <td>2.67949</td>\n", | |
| " <td>2.23199</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>2.35867</td>\n", | |
| " <td>2.22319</td>\n", | |
| " <td>2.87256</td>\n", | |
| " <td>2.12792</td>\n", | |
| " <td>2.60783</td>\n", | |
| " <td>1.95624</td>\n", | |
| " <td>3.3292</td>\n", | |
| " <td>2.21903</td>\n", | |
| " <td>1.62661</td>\n", | |
| " <td>2.88701</td>\n", | |
| " <td>2.45772</td>\n", | |
| " <td>5.55582</td>\n", | |
| " <td>-0.547378</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>3.4967</td>\n", | |
| " <td>5.46856</td>\n", | |
| " <td>2.88462</td>\n", | |
| " <td>2.15646</td>\n", | |
| " <td>3.69709</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>3.55698</td>\n", | |
| " <td>2.72407</td>\n", | |
| " <td>1.44683</td>\n", | |
| " <td>3.38994</td>\n", | |
| " <td>3.4811</td>\n", | |
| " <td>3.16111</td>\n", | |
| " <td>3.69043</td>\n", | |
| " <td>3.47104</td>\n", | |
| " <td>2.26725</td>\n", | |
| " <td>4.41311</td>\n", | |
| " <td>3.32879</td>\n", | |
| " <td>5.40222</td>\n", | |
| " <td>1.57468</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>2.07944</td>\n", | |
| " <td>3.17045</td>\n", | |
| " <td>1.8212</td>\n", | |
| " <td>1.2551</td>\n", | |
| " <td>2.07377</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>2.10283</td>\n", | |
| " <td>1.77954</td>\n", | |
| " <td>1.31398</td>\n", | |
| " <td>1.76657</td>\n", | |
| " <td>2.04698</td>\n", | |
| " <td>2.38284</td>\n", | |
| " <td>3.38082</td>\n", | |
| " <td>1.90708</td>\n", | |
| " <td>1.69626</td>\n", | |
| " <td>2.36494</td>\n", | |
| " <td>1.83747</td>\n", | |
| " <td>3.99566</td>\n", | |
| " <td>1.23495</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation (b-a)</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>1.16474</td>\n", | |
| " <td>1.24943</td>\n", | |
| " <td>1.16828</td>\n", | |
| " <td>1.10632</td>\n", | |
| " <td>1.11174</td>\n", | |
| " <td>N</td>\n", | |
| " <td>1.07434</td>\n", | |
| " <td>2.37925</td>\n", | |
| " <td>0.513553</td>\n", | |
| " <td>0.939633</td>\n", | |
| " <td>1.07013</td>\n", | |
| " <td>1.42161</td>\n", | |
| " <td>1.2845</td>\n", | |
| " <td>1.16958</td>\n", | |
| " <td>1.60604</td>\n", | |
| " <td>0.832465</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>0.521486</td>\n", | |
| " <td>0.286413</td>\n", | |
| " <td>1.12521</td>\n", | |
| " <td>0.0209712</td>\n", | |
| " <td>0.548394</td>\n", | |
| " <td>R</td>\n", | |
| " <td>0.534478</td>\n", | |
| " <td>0.318813</td>\n", | |
| " <td>0.179146</td>\n", | |
| " <td>0.506285</td>\n", | |
| " <td>0.436058</td>\n", | |
| " <td>1.50502</td>\n", | |
| " <td>1.45517</td>\n", | |
| " <td>0.417999</td>\n", | |
| " <td>0.927354</td>\n", | |
| " <td>0.153997</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>4.00179</td>\n", | |
| " <td>4.95353</td>\n", | |
| " <td>3.77736</td>\n", | |
| " <td>3.01536</td>\n", | |
| " <td>4.04543</td>\n", | |
| " <td>R</td>\n", | |
| " <td>4.03955</td>\n", | |
| " <td>3.41268</td>\n", | |
| " <td>1.62383</td>\n", | |
| " <td>3.62774</td>\n", | |
| " <td>3.97737</td>\n", | |
| " <td>4.89422</td>\n", | |
| " <td>3.35868</td>\n", | |
| " <td>4.09564</td>\n", | |
| " <td>3.6354</td>\n", | |
| " <td>4.33353</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation (b-a)</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>16.2163</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>16.2163</td>\n", | |
| " <td>0</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>-0.722503</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <th>10007792</th>\n", | |
| " <td>19.8536</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>27.152</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>PT</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years \\\n", | |
| " Unnamed: 1_level_1 \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 2.34886 \n", | |
| "Assessment and feedback 10007792 3.4967 \n", | |
| "Academic support 10007792 2.07944 \n", | |
| "Non-continuation (b-a) 10007792 1.16474 \n", | |
| "Employment or further study 10007792 0.521486 \n", | |
| "Highly skilled employment or further study 10007792 4.00179 \n", | |
| "The teaching on my course 10007792 SUP \n", | |
| "Assessment and feedback 10007792 SUP \n", | |
| "Academic support 10007792 SUP \n", | |
| "Non-continuation (b-a) 10007792 16.2163 \n", | |
| "Employment or further study 10007792 -0.722503 \n", | |
| "Highly skilled employment or further study 10007792 19.8536 \n", | |
| "\n", | |
| " Years \\\n", | |
| " 1 2 \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 3.04161 1.22077 \n", | |
| "Assessment and feedback 10007792 5.46856 2.88462 \n", | |
| "Academic support 10007792 3.17045 1.8212 \n", | |
| "Non-continuation (b-a) 10007792 1.24943 1.16828 \n", | |
| "Employment or further study 10007792 0.286413 1.12521 \n", | |
| "Highly skilled employment or further study 10007792 4.95353 3.77736 \n", | |
| "The teaching on my course 10007792 SUP SUP \n", | |
| "Assessment and feedback 10007792 SUP SUP \n", | |
| "Academic support 10007792 SUP SUP \n", | |
| "Non-continuation (b-a) 10007792 N N \n", | |
| "Employment or further study 10007792 N DP \n", | |
| "Highly skilled employment or further study 10007792 N DP \n", | |
| "\n", | |
| " Level of study \\\n", | |
| " 3 First degree \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 2.67949 2.23199 \n", | |
| "Assessment and feedback 10007792 2.15646 3.69709 \n", | |
| "Academic support 10007792 1.2551 2.07377 \n", | |
| "Non-continuation (b-a) 10007792 1.10632 1.11174 \n", | |
| "Employment or further study 10007792 0.0209712 0.548394 \n", | |
| "Highly skilled employment or further study 10007792 3.01536 4.04543 \n", | |
| "The teaching on my course 10007792 N N \n", | |
| "Assessment and feedback 10007792 N N \n", | |
| "Academic support 10007792 N N \n", | |
| "Non-continuation (b-a) 10007792 N 16.2163 \n", | |
| "Employment or further study 10007792 DP DP \n", | |
| "Highly skilled employment or further study 10007792 DP 27.152 \n", | |
| "\n", | |
| " Age \\\n", | |
| " Other\\nUG Young \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 NaN 2.35867 \n", | |
| "Assessment and feedback 10007792 NaN 3.55698 \n", | |
| "Academic support 10007792 NaN 2.10283 \n", | |
| "Non-continuation (b-a) 10007792 N 1.07434 \n", | |
| "Employment or further study 10007792 R 0.534478 \n", | |
| "Highly skilled employment or further study 10007792 R 4.03955 \n", | |
| "The teaching on my course 10007792 SUP N \n", | |
| "Assessment and feedback 10007792 SUP N \n", | |
| "Academic support 10007792 SUP N \n", | |
| "Non-continuation (b-a) 10007792 0 N \n", | |
| "Employment or further study 10007792 SUP N \n", | |
| "Highly skilled employment or further study 10007792 SUP N \n", | |
| "\n", | |
| " Disadvantaged \\\n", | |
| " Mature Yes \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 2.22319 2.87256 \n", | |
| "Assessment and feedback 10007792 2.72407 1.44683 \n", | |
| "Academic support 10007792 1.77954 1.31398 \n", | |
| "Non-continuation (b-a) 10007792 2.37925 0.513553 \n", | |
| "Employment or further study 10007792 0.318813 0.179146 \n", | |
| "Highly skilled employment or further study 10007792 3.41268 1.62383 \n", | |
| "The teaching on my course 10007792 SUP N \n", | |
| "Assessment and feedback 10007792 SUP N \n", | |
| "Academic support 10007792 SUP N \n", | |
| "Non-continuation (b-a) 10007792 DP NaN \n", | |
| "Employment or further study 10007792 DP R \n", | |
| "Highly skilled employment or further study 10007792 DP R \n", | |
| "\n", | |
| " Ethnicity \\\n", | |
| " No White \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 2.12792 2.60783 \n", | |
| "Assessment and feedback 10007792 3.38994 3.4811 \n", | |
| "Academic support 10007792 1.76657 2.04698 \n", | |
| "Non-continuation (b-a) 10007792 0.939633 1.07013 \n", | |
| "Employment or further study 10007792 0.506285 0.436058 \n", | |
| "Highly skilled employment or further study 10007792 3.62774 3.97737 \n", | |
| "The teaching on my course 10007792 N N \n", | |
| "Assessment and feedback 10007792 N N \n", | |
| "Academic support 10007792 N N \n", | |
| "Non-continuation (b-a) 10007792 N DP \n", | |
| "Employment or further study 10007792 N DP \n", | |
| "Highly skilled employment or further study 10007792 N DP \n", | |
| "\n", | |
| " Disabled \\\n", | |
| " BME Yes \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 1.95624 3.3292 \n", | |
| "Assessment and feedback 10007792 3.16111 3.69043 \n", | |
| "Academic support 10007792 2.38284 3.38082 \n", | |
| "Non-continuation (b-a) 10007792 1.42161 1.2845 \n", | |
| "Employment or further study 10007792 1.50502 1.45517 \n", | |
| "Highly skilled employment or further study 10007792 4.89422 3.35868 \n", | |
| "The teaching on my course 10007792 R N \n", | |
| "Assessment and feedback 10007792 R N \n", | |
| "Academic support 10007792 R N \n", | |
| "Non-continuation (b-a) 10007792 N N \n", | |
| "Employment or further study 10007792 NaN N \n", | |
| "Highly skilled employment or further study 10007792 NaN N \n", | |
| "\n", | |
| " Sex \\\n", | |
| " No Male \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 2.21903 1.62661 \n", | |
| "Assessment and feedback 10007792 3.47104 2.26725 \n", | |
| "Academic support 10007792 1.90708 1.69626 \n", | |
| "Non-continuation (b-a) 10007792 1.16958 1.60604 \n", | |
| "Employment or further study 10007792 0.417999 0.927354 \n", | |
| "Highly skilled employment or further study 10007792 4.09564 3.6354 \n", | |
| "The teaching on my course 10007792 SUP SUP \n", | |
| "Assessment and feedback 10007792 SUP SUP \n", | |
| "Academic support 10007792 SUP SUP \n", | |
| "Non-continuation (b-a) 10007792 DP N \n", | |
| "Employment or further study 10007792 DP DP \n", | |
| "Highly skilled employment or further study 10007792 DP DP \n", | |
| "\n", | |
| " Domicile \\\n", | |
| " Female UK \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 2.88701 2.45772 \n", | |
| "Assessment and feedback 10007792 4.41311 3.32879 \n", | |
| "Academic support 10007792 2.36494 1.83747 \n", | |
| "Non-continuation (b-a) 10007792 0.832465 0 \n", | |
| "Employment or further study 10007792 0.153997 0 \n", | |
| "Highly skilled employment or further study 10007792 4.33353 0 \n", | |
| "The teaching on my course 10007792 SUP SUP \n", | |
| "Assessment and feedback 10007792 SUP SUP \n", | |
| "Academic support 10007792 SUP SUP \n", | |
| "Non-continuation (b-a) 10007792 DP 0 \n", | |
| "Employment or further study 10007792 DP 0 \n", | |
| "Highly skilled employment or further study 10007792 DP 0 \n", | |
| "\n", | |
| " Type \n", | |
| " Other EU Non EU \n", | |
| "Topic PRN \n", | |
| "The teaching on my course 10007792 5.55582 -0.547378 FT \n", | |
| "Assessment and feedback 10007792 5.40222 1.57468 FT \n", | |
| "Academic support 10007792 3.99566 1.23495 FT \n", | |
| "Non-continuation (b-a) 10007792 0 0 FT \n", | |
| "Employment or further study 10007792 0 0 FT \n", | |
| "Highly skilled employment or further study 10007792 0 0 FT \n", | |
| "The teaching on my course 10007792 R N PT \n", | |
| "Assessment and feedback 10007792 R N PT \n", | |
| "Academic support 10007792 R N PT \n", | |
| "Non-continuation (b-a) 10007792 0 0 PT \n", | |
| "Employment or further study 10007792 0 0 PT \n", | |
| "Highly skilled employment or further study 10007792 0 0 PT " | |
| ] | |
| }, | |
| "execution_count": 173, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "differences.head(20)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 853, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>1.462632</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>2.235306</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>1.164338</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation (b-a)</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>0.782399</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>nan</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>5.400000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Heading Subheading value\n", | |
| "0 The teaching on my course nan All years Unnamed: 1_level_1 1.462632\n", | |
| "1 Assessment and feedback nan All years Unnamed: 1_level_1 2.235306\n", | |
| "2 Academic support nan All years Unnamed: 1_level_1 1.164338\n", | |
| "3 Non-continuation (b-a) nan All years Unnamed: 1_level_1 0.782399\n", | |
| "4 Employment or further study nan All years Unnamed: 1_level_1 5.400000" | |
| ] | |
| }, | |
| "execution_count": 853, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "differences_long=longify(differences,'value')\n", | |
| "differences_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 854, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "differences_long.to_csv('tef_differences_long.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Zscores\n", | |
| "\n", | |
| "These relate to the indicators." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 165, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-165-a2cc9a145e05>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'~'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m \u001b[0mz_tmp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msd_tmp\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mget_zscores\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 39\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mz\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mz_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0msd\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msd\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msd_tmp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-165-a2cc9a145e05>\u001b[0m in \u001b[0;36mget_zscores\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0minstitution\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgetPRN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mz\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcleanCols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mparse_cols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m22\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PRN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mzft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mz\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0mrow_tag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"row\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 545\u001b[0m \u001b[0mself_do_row\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_row\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 546\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0melem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 547\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mrow_tag\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 548\u001b[0m \u001b[0mself_do_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/lib/python3.5/xml/etree/ElementTree.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1295\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1297\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_events\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1298\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mevent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1299\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parser\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/lib/python3.5/xml/etree/ElementTree.py\u001b[0m in \u001b[0;36mread_events\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1263\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1264\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1265\u001b[0;31m \u001b[0mevent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevents\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1266\u001b[0m \u001b[0;31m# Avoid retaining references to past events\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[0mevents\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_index\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def get_zscores(fn):\n", | |
| " sn='Z-score'\n", | |
| " print(fn)\n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| "\n", | |
| " z=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=8,parse_cols=22,header=[0,1]))\n", | |
| " z['PRN']=prn\n", | |
| " zft=z[1:7][:]\n", | |
| " zft['Type']='FT'\n", | |
| " zpt=z[8:14][:]\n", | |
| " zpt['Type']='PT'\n", | |
| " \n", | |
| " sd=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=30,parse_cols=22, header=[0,1]))\n", | |
| " sd['PRN']=prn\n", | |
| " sdft=sd[1:7][:]\n", | |
| " sdft['Type']='FT'\n", | |
| " sdpt=sd[8:14][:]\n", | |
| " sdpt['Type']='PT'\n", | |
| " \n", | |
| " z=pd.concat([zft,zpt]).dropna(how='all',axis=1)\n", | |
| " sd=pd.concat([sdft,sdpt]).dropna(how='all',axis=1)\n", | |
| " \n", | |
| " z['Error type']='z'\n", | |
| " sd['Error type']='sd'\n", | |
| " \n", | |
| " z.index.names=['Topic']\n", | |
| " sd.index.names=['Topic']\n", | |
| " \n", | |
| " return z,sd\n", | |
| "\n", | |
| "#fn='tef/TEST_TEF_Metrics_workbook_exemplar.xlsx'\n", | |
| "#z,sd= get_zscores(fn)\n", | |
| "\n", | |
| "z=pd.DataFrame()\n", | |
| "sd=pd.DataFrame()\n", | |
| "\n", | |
| "for filename in [f for f in os.listdir(directory) if not f.startswith('~')]:\n", | |
| " z_tmp,sd_tmp= get_zscores(fn='{}/{}'.format(directory,filename))\n", | |
| " z=pd.concat([z,z_tmp])\n", | |
| " sd=pd.concat([sd,sd_tmp])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 174, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th>...</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disabled</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Domicile</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Type</th>\n", | |
| " <th>Error type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First degree</th>\n", | |
| " <th>Other\\nUG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>7.10342</td>\n", | |
| " <td>5.3252</td>\n", | |
| " <td>2.03351</td>\n", | |
| " <td>4.94598</td>\n", | |
| " <td>6.77154</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>6.85589</td>\n", | |
| " <td>1.80743</td>\n", | |
| " <td>2.75972</td>\n", | |
| " <td>5.39015</td>\n", | |
| " <td>...</td>\n", | |
| " <td>3.45701</td>\n", | |
| " <td>6.30257</td>\n", | |
| " <td>3.06317</td>\n", | |
| " <td>6.87317</td>\n", | |
| " <td>6.85111</td>\n", | |
| " <td>3.56633</td>\n", | |
| " <td>-0.569411</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>7.6247</td>\n", | |
| " <td>6.93072</td>\n", | |
| " <td>3.48961</td>\n", | |
| " <td>2.83967</td>\n", | |
| " <td>8.04842</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>7.43428</td>\n", | |
| " <td>1.639</td>\n", | |
| " <td>0.989925</td>\n", | |
| " <td>6.05305</td>\n", | |
| " <td>...</td>\n", | |
| " <td>2.73797</td>\n", | |
| " <td>7.11754</td>\n", | |
| " <td>3.19442</td>\n", | |
| " <td>7.34698</td>\n", | |
| " <td>6.53844</td>\n", | |
| " <td>2.50602</td>\n", | |
| " <td>1.34462</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>5.27047</td>\n", | |
| " <td>4.67032</td>\n", | |
| " <td>2.57395</td>\n", | |
| " <td>1.91782</td>\n", | |
| " <td>5.25888</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>5.13993</td>\n", | |
| " <td>1.19991</td>\n", | |
| " <td>1.05724</td>\n", | |
| " <td>3.68713</td>\n", | |
| " <td>...</td>\n", | |
| " <td>2.94066</td>\n", | |
| " <td>4.53997</td>\n", | |
| " <td>2.8402</td>\n", | |
| " <td>4.50317</td>\n", | |
| " <td>4.20403</td>\n", | |
| " <td>2.22035</td>\n", | |
| " <td>1.19656</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>5.74893</td>\n", | |
| " <td>3.6081</td>\n", | |
| " <td>3.10899</td>\n", | |
| " <td>3.35249</td>\n", | |
| " <td>5.70975</td>\n", | |
| " <td>N</td>\n", | |
| " <td>5.63506</td>\n", | |
| " <td>2.20931</td>\n", | |
| " <td>0.824919</td>\n", | |
| " <td>4.90464</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.97203</td>\n", | |
| " <td>5.51903</td>\n", | |
| " <td>5.08287</td>\n", | |
| " <td>3.19744</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>1.84464</td>\n", | |
| " <td>0.546067</td>\n", | |
| " <td>2.44282</td>\n", | |
| " <td>0.0442631</td>\n", | |
| " <td>1.91896</td>\n", | |
| " <td>R</td>\n", | |
| " <td>1.84295</td>\n", | |
| " <td>0.267295</td>\n", | |
| " <td>0.222057</td>\n", | |
| " <td>1.63644</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.69252</td>\n", | |
| " <td>1.4045</td>\n", | |
| " <td>2.04653</td>\n", | |
| " <td>0.436756</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <td>7.86696</td>\n", | |
| " <td>5.49485</td>\n", | |
| " <td>4.41337</td>\n", | |
| " <td>3.52352</td>\n", | |
| " <td>7.89039</td>\n", | |
| " <td>R</td>\n", | |
| " <td>7.59202</td>\n", | |
| " <td>1.84611</td>\n", | |
| " <td>1.07268</td>\n", | |
| " <td>6.43625</td>\n", | |
| " <td>...</td>\n", | |
| " <td>2.38469</td>\n", | |
| " <td>7.50914</td>\n", | |
| " <td>4.99549</td>\n", | |
| " <td>6.1014</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>1.67569</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>N</td>\n", | |
| " <td>1.67569</td>\n", | |
| " <td>0</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>N</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>-0.266142</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Highly skilled employment or further study</th>\n", | |
| " <td>4.05138</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>3.70309</td>\n", | |
| " <td>SUP</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>R</td>\n", | |
| " <td>N</td>\n", | |
| " <td>...</td>\n", | |
| " <td>N</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>DP</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>PT</td>\n", | |
| " <td>z</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>12 rows × 22 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years Years \\\n", | |
| " Unnamed: 1_level_1 1 \n", | |
| "Topic \n", | |
| "The teaching on my course 7.10342 5.3252 \n", | |
| "Assessment and feedback 7.6247 6.93072 \n", | |
| "Academic support 5.27047 4.67032 \n", | |
| "Non-continuation 5.74893 3.6081 \n", | |
| "Employment or further study 1.84464 0.546067 \n", | |
| "Highly skilled employment or further study 7.86696 5.49485 \n", | |
| "The teaching on my course SUP SUP \n", | |
| "Assessment and feedback SUP SUP \n", | |
| "Academic support SUP SUP \n", | |
| "Non-continuation 1.67569 N \n", | |
| "Employment or further study -0.266142 N \n", | |
| "Highly skilled employment or further study 4.05138 N \n", | |
| "\n", | |
| " Level of study \\\n", | |
| " 2 3 First degree \n", | |
| "Topic \n", | |
| "The teaching on my course 2.03351 4.94598 6.77154 \n", | |
| "Assessment and feedback 3.48961 2.83967 8.04842 \n", | |
| "Academic support 2.57395 1.91782 5.25888 \n", | |
| "Non-continuation 3.10899 3.35249 5.70975 \n", | |
| "Employment or further study 2.44282 0.0442631 1.91896 \n", | |
| "Highly skilled employment or further study 4.41337 3.52352 7.89039 \n", | |
| "The teaching on my course SUP N N \n", | |
| "Assessment and feedback SUP N N \n", | |
| "Academic support SUP N N \n", | |
| "Non-continuation N N 1.67569 \n", | |
| "Employment or further study DP DP DP \n", | |
| "Highly skilled employment or further study DP DP 3.70309 \n", | |
| "\n", | |
| " Age \\\n", | |
| " Other\\nUG Young Mature \n", | |
| "Topic \n", | |
| "The teaching on my course NaN 6.85589 1.80743 \n", | |
| "Assessment and feedback NaN 7.43428 1.639 \n", | |
| "Academic support NaN 5.13993 1.19991 \n", | |
| "Non-continuation N 5.63506 2.20931 \n", | |
| "Employment or further study R 1.84295 0.267295 \n", | |
| "Highly skilled employment or further study R 7.59202 1.84611 \n", | |
| "The teaching on my course SUP N SUP \n", | |
| "Assessment and feedback SUP N SUP \n", | |
| "Academic support SUP N SUP \n", | |
| "Non-continuation 0 N DP \n", | |
| "Employment or further study SUP N DP \n", | |
| "Highly skilled employment or further study SUP N DP \n", | |
| "\n", | |
| " Disadvantaged ... \\\n", | |
| " Yes No ... \n", | |
| "Topic ... \n", | |
| "The teaching on my course 2.75972 5.39015 ... \n", | |
| "Assessment and feedback 0.989925 6.05305 ... \n", | |
| "Academic support 1.05724 3.68713 ... \n", | |
| "Non-continuation 0.824919 4.90464 ... \n", | |
| "Employment or further study 0.222057 1.63644 ... \n", | |
| "Highly skilled employment or further study 1.07268 6.43625 ... \n", | |
| "The teaching on my course N N ... \n", | |
| "Assessment and feedback N N ... \n", | |
| "Academic support N N ... \n", | |
| "Non-continuation NaN N ... \n", | |
| "Employment or further study R N ... \n", | |
| "Highly skilled employment or further study R N ... \n", | |
| "\n", | |
| " Disabled Sex \\\n", | |
| " Yes No Male \n", | |
| "Topic \n", | |
| "The teaching on my course 3.45701 6.30257 3.06317 \n", | |
| "Assessment and feedback 2.73797 7.11754 3.19442 \n", | |
| "Academic support 2.94066 4.53997 2.8402 \n", | |
| "Non-continuation 1.97203 5.51903 5.08287 \n", | |
| "Employment or further study 1.69252 1.4045 2.04653 \n", | |
| "Highly skilled employment or further study 2.38469 7.50914 4.99549 \n", | |
| "The teaching on my course N SUP SUP \n", | |
| "Assessment and feedback N SUP SUP \n", | |
| "Academic support N SUP SUP \n", | |
| "Non-continuation N DP N \n", | |
| "Employment or further study N DP DP \n", | |
| "Highly skilled employment or further study N DP DP \n", | |
| "\n", | |
| " Domicile \\\n", | |
| " Female UK Other EU \n", | |
| "Topic \n", | |
| "The teaching on my course 6.87317 6.85111 3.56633 \n", | |
| "Assessment and feedback 7.34698 6.53844 2.50602 \n", | |
| "Academic support 4.50317 4.20403 2.22035 \n", | |
| "Non-continuation 3.19744 0 0 \n", | |
| "Employment or further study 0.436756 0 0 \n", | |
| "Highly skilled employment or further study 6.1014 0 0 \n", | |
| "The teaching on my course SUP SUP R \n", | |
| "Assessment and feedback SUP SUP R \n", | |
| "Academic support SUP SUP R \n", | |
| "Non-continuation DP 0 0 \n", | |
| "Employment or further study DP 0 0 \n", | |
| "Highly skilled employment or further study DP 0 0 \n", | |
| "\n", | |
| " PRN Type Error type \n", | |
| " Non EU \n", | |
| "Topic \n", | |
| "The teaching on my course -0.569411 10007792 FT z \n", | |
| "Assessment and feedback 1.34462 10007792 FT z \n", | |
| "Academic support 1.19656 10007792 FT z \n", | |
| "Non-continuation 0 10007792 FT z \n", | |
| "Employment or further study 0 10007792 FT z \n", | |
| "Highly skilled employment or further study 0 10007792 FT z \n", | |
| "The teaching on my course N 10007792 PT z \n", | |
| "Assessment and feedback N 10007792 PT z \n", | |
| "Academic support N 10007792 PT z \n", | |
| "Non-continuation 0 10007792 PT z \n", | |
| "Employment or further study 0 10007792 PT z \n", | |
| "Highly skilled employment or further study 0 10007792 PT z \n", | |
| "\n", | |
| "[12 rows x 22 columns]" | |
| ] | |
| }, | |
| "execution_count": 174, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "z.head(20)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 841, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>All years</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Years</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Level of study</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Age</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Disadvantaged</th>\n", | |
| " <th>...</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Sex</th>\n", | |
| " <th colspan=\"2\" halign=\"left\">Welsh medium</th>\n", | |
| " <th colspan=\"3\" halign=\"left\">Domicile</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Type</th>\n", | |
| " <th>Error type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th></th>\n", | |
| " <th>Unnamed: 1_level_1</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>First Degree</th>\n", | |
| " <th>Other UG</th>\n", | |
| " <th>Young</th>\n", | |
| " <th>Mature</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>...</th>\n", | |
| " <th>Male</th>\n", | |
| " <th>Female</th>\n", | |
| " <th>Yes</th>\n", | |
| " <th>No</th>\n", | |
| " <th>UK</th>\n", | |
| " <th>Other EU</th>\n", | |
| " <th>Non EU</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Topic</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>The teaching on my course</th>\n", | |
| " <td>1.21886</td>\n", | |
| " <td>4.44397</td>\n", | |
| " <td>0.100994</td>\n", | |
| " <td>6.0307</td>\n", | |
| " <td>3.47268</td>\n", | |
| " <td>1.08085</td>\n", | |
| " <td>1.74223</td>\n", | |
| " <td>3.14273</td>\n", | |
| " <td>3.84986</td>\n", | |
| " <td>0.287595</td>\n", | |
| " <td>...</td>\n", | |
| " <td>4.33997</td>\n", | |
| " <td>1.712574</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.906851</td>\n", | |
| " <td>7.05848</td>\n", | |
| " <td>0.907585</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>sd</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Assessment and feedback</th>\n", | |
| " <td>0.620918</td>\n", | |
| " <td>0.505516</td>\n", | |
| " <td>1.22337</td>\n", | |
| " <td>1.21236</td>\n", | |
| " <td>0.0956798</td>\n", | |
| " <td>0.754485</td>\n", | |
| " <td>0.522697</td>\n", | |
| " <td>1.45514</td>\n", | |
| " <td>0.0966519</td>\n", | |
| " <td>0.849326</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.347848</td>\n", | |
| " <td>1.653479</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.175016</td>\n", | |
| " <td>1.16463</td>\n", | |
| " <td>0.810035</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>sd</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Academic support</th>\n", | |
| " <td>0.415835</td>\n", | |
| " <td>0.926926</td>\n", | |
| " <td>0.040144</td>\n", | |
| " <td>1.17222</td>\n", | |
| " <td>1.605</td>\n", | |
| " <td>0.671054</td>\n", | |
| " <td>1.70013</td>\n", | |
| " <td>0.940759</td>\n", | |
| " <td>2.50489</td>\n", | |
| " <td>0.570067</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.61696</td>\n", | |
| " <td>1.058166</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.360268</td>\n", | |
| " <td>2.53371</td>\n", | |
| " <td>0.0795091</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>sd</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Non-continuation</th>\n", | |
| " <td>0.325999</td>\n", | |
| " <td>0.3527</td>\n", | |
| " <td>0.90899</td>\n", | |
| " <td>0.330694</td>\n", | |
| " <td>1.65284</td>\n", | |
| " <td>1.21958</td>\n", | |
| " <td>1.01048</td>\n", | |
| " <td>0.251151</td>\n", | |
| " <td>1.60483</td>\n", | |
| " <td>0.491675</td>\n", | |
| " <td>...</td>\n", | |
| " <td>1.17956</td>\n", | |
| " <td>0.547368</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>sd</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>Employment or further study</th>\n", | |
| " <td>0.642857</td>\n", | |
| " <td>0.900592</td>\n", | |
| " <td>0.0382883</td>\n", | |
| " <td>0.57619</td>\n", | |
| " <td>0.151839</td>\n", | |
| " <td>0.408096</td>\n", | |
| " <td>0.293578</td>\n", | |
| " <td>0.911861</td>\n", | |
| " <td>1.25866</td>\n", | |
| " <td>0.0658683</td>\n", | |
| " <td>...</td>\n", | |
| " <td>0.413673</td>\n", | |
| " <td>0.759783</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>FT</td>\n", | |
| " <td>sd</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>5 rows × 24 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " All years Years \\\n", | |
| " Unnamed: 1_level_1 1 2 \n", | |
| "Topic \n", | |
| "The teaching on my course 1.21886 4.44397 0.100994 \n", | |
| "Assessment and feedback 0.620918 0.505516 1.22337 \n", | |
| "Academic support 0.415835 0.926926 0.040144 \n", | |
| "Non-continuation 0.325999 0.3527 0.90899 \n", | |
| "Employment or further study 0.642857 0.900592 0.0382883 \n", | |
| "\n", | |
| " Level of study Age \\\n", | |
| " 3 First Degree Other UG Young \n", | |
| "Topic \n", | |
| "The teaching on my course 6.0307 3.47268 1.08085 1.74223 \n", | |
| "Assessment and feedback 1.21236 0.0956798 0.754485 0.522697 \n", | |
| "Academic support 1.17222 1.605 0.671054 1.70013 \n", | |
| "Non-continuation 0.330694 1.65284 1.21958 1.01048 \n", | |
| "Employment or further study 0.57619 0.151839 0.408096 0.293578 \n", | |
| "\n", | |
| " Disadvantaged ... \\\n", | |
| " Mature Yes No ... \n", | |
| "Topic ... \n", | |
| "The teaching on my course 3.14273 3.84986 0.287595 ... \n", | |
| "Assessment and feedback 1.45514 0.0966519 0.849326 ... \n", | |
| "Academic support 0.940759 2.50489 0.570067 ... \n", | |
| "Non-continuation 0.251151 1.60483 0.491675 ... \n", | |
| "Employment or further study 0.911861 1.25866 0.0658683 ... \n", | |
| "\n", | |
| " Sex Welsh medium Domicile \\\n", | |
| " Male Female Yes No UK \n", | |
| "Topic \n", | |
| "The teaching on my course 4.33997 1.712574 0 0 0.906851 \n", | |
| "Assessment and feedback 0.347848 1.653479 0 0 0.175016 \n", | |
| "Academic support 1.61696 1.058166 0 0 0.360268 \n", | |
| "Non-continuation 1.17956 0.547368 0 0 NaN \n", | |
| "Employment or further study 0.413673 0.759783 0 0 NaN \n", | |
| "\n", | |
| " PRN Type Error type \n", | |
| " Other EU Non EU \n", | |
| "Topic \n", | |
| "The teaching on my course 7.05848 0.907585 dfdf FT sd \n", | |
| "Assessment and feedback 1.16463 0.810035 dfdf FT sd \n", | |
| "Academic support 2.53371 0.0795091 dfdf FT sd \n", | |
| "Non-continuation NaN NaN dfdf FT sd \n", | |
| "Employment or further study NaN NaN dfdf FT sd \n", | |
| "\n", | |
| "[5 rows x 24 columns]" | |
| ] | |
| }, | |
| "execution_count": 841, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "sd.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 855, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>sd value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>1.218860</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>0.620918</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>0.415835</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>0.325999</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>0.642857</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Heading Subheading sd value\n", | |
| "0 The teaching on my course dfdf All years Unnamed: 1_level_1 1.218860\n", | |
| "1 Assessment and feedback dfdf All years Unnamed: 1_level_1 0.620918\n", | |
| "2 Academic support dfdf All years Unnamed: 1_level_1 0.415835\n", | |
| "3 Non-continuation dfdf All years Unnamed: 1_level_1 0.325999\n", | |
| "4 Employment or further study dfdf All years Unnamed: 1_level_1 0.642857" | |
| ] | |
| }, | |
| "execution_count": 855, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "sd_long=longify(sd,'sd value')\n", | |
| "sd_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 856, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>z value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>1.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>3.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>-2.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>2.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>8.4</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Heading Subheading z value\n", | |
| "0 The teaching on my course dfdf All years Unnamed: 1_level_1 1.2\n", | |
| "1 Assessment and feedback dfdf All years Unnamed: 1_level_1 3.6\n", | |
| "2 Academic support dfdf All years Unnamed: 1_level_1 -2.8\n", | |
| "3 Non-continuation dfdf All years Unnamed: 1_level_1 2.4\n", | |
| "4 Employment or further study dfdf All years Unnamed: 1_level_1 8.4" | |
| ] | |
| }, | |
| "execution_count": 856, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "z_long=longify(z,'z value')\n", | |
| "z_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 758, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>87.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>73.3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>81.99</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>5.3176</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>92.2</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Heading Subheading value\n", | |
| "0 The teaching on my course dfdf All years Unnamed: 1_level_1 87.2\n", | |
| "1 Assessment and feedback dfdf All years Unnamed: 1_level_1 73.3\n", | |
| "2 Academic support dfdf All years Unnamed: 1_level_1 81.99\n", | |
| "3 Non-continuation dfdf All years Unnamed: 1_level_1 5.3176\n", | |
| "4 Employment or further study dfdf All years Unnamed: 1_level_1 92.2" | |
| ] | |
| }, | |
| "execution_count": 758, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "indicators_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 795, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Topic</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Heading</th>\n", | |
| " <th>Subheading</th>\n", | |
| " <th>value</th>\n", | |
| " <th>sd value</th>\n", | |
| " <th>z value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>87.2</td>\n", | |
| " <td>1.21886</td>\n", | |
| " <td>1.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>73.3</td>\n", | |
| " <td>0.620918</td>\n", | |
| " <td>3.6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>81.99</td>\n", | |
| " <td>0.415835</td>\n", | |
| " <td>-2.8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>5.3176</td>\n", | |
| " <td>0.325999</td>\n", | |
| " <td>2.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>dfdf</td>\n", | |
| " <td>All years</td>\n", | |
| " <td>Unnamed: 1_level_1</td>\n", | |
| " <td>5.3176</td>\n", | |
| " <td>0.325999</td>\n", | |
| " <td>1.4</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Topic PRN Heading Subheading value \\\n", | |
| "0 The teaching on my course dfdf All years Unnamed: 1_level_1 87.2 \n", | |
| "1 Assessment and feedback dfdf All years Unnamed: 1_level_1 73.3 \n", | |
| "2 Academic support dfdf All years Unnamed: 1_level_1 81.99 \n", | |
| "3 Non-continuation dfdf All years Unnamed: 1_level_1 5.3176 \n", | |
| "4 Non-continuation dfdf All years Unnamed: 1_level_1 5.3176 \n", | |
| "\n", | |
| " sd value z value \n", | |
| "0 1.21886 1.2 \n", | |
| "1 0.620918 3.6 \n", | |
| "2 0.415835 -2.8 \n", | |
| "3 0.325999 2.4 \n", | |
| "4 0.325999 1.4 " | |
| ] | |
| }, | |
| "execution_count": 795, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ifull_long=indicators_long.merge( sd_long, on=['Topic','PRN','Heading','Subheading'])\n", | |
| "ifull_long=ifull_long.merge( z_long, on=['Topic','PRN','Heading','Subheading'])\n", | |
| "ifull_long.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 796, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ifull_long.to_csv('tef_indicators_with_error_long.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Ethnicity" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 82, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def get_ethnicity(fn):\n", | |
| " sn='BME breakdown'\n", | |
| " print(fn)\n", | |
| " institution,prn=getPRN(fn,sn)\n", | |
| "\n", | |
| " e=cleanCols(pd.read_excel(fn, sheetname=sn, skiprows=9,parse_cols=5))\n", | |
| " e.columns=['Category','NULL','BME','Black','Asian','Other']\n", | |
| " e['PRN']=prn\n", | |
| " \n", | |
| " ft=e[:6].dropna(how='all',axis=1)\n", | |
| " ft['Type']='FT'\n", | |
| " \n", | |
| " pt=e[7:13].dropna(how='all',axis=1)#.dropna(how='all',axis=1))[:-1]\n", | |
| " pt['Type']='PT'\n", | |
| "\n", | |
| " return pd.concat([ft,pt])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 175, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "TEFYearTwo_AllMetrics/10007792_University of Exeter_Metrics.xlsx\n", | |
| "TEFYearTwo_AllMetrics/10007795_The University of Leeds_Metrics.xlsx\n" | |
| ] | |
| }, | |
| { | |
| "ename": "KeyboardInterrupt", | |
| "evalue": "", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-175-69c6fa652c51>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'~'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0methnicity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0methnicity\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mget_ethnicity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'{}/{}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0methnicity\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'tef_ethnicity.csv'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m<ipython-input-82-2790a51d3336>\u001b[0m in \u001b[0;36mget_ethnicity\u001b[0;34m(fn)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0minstitution\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgetPRN\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0me\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcleanCols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m9\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mparse_cols\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Category'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'NULL'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'BME'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Black'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Asian'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Other'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PRN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36mread_excel\u001b[0;34m(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, dtype, true_values, false_values, engine, squeeze, **kwds)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m return io._parse_excel(\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/pandas/io/excel.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, io, **kwds)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen_workbook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m raise ValueError('Must explicitly set engine if not passing in'\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/__init__.py\u001b[0m in \u001b[0;36mopen_workbook\u001b[0;34m(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0mformatting_info\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatting_info\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0mon_demand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon_demand\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m \u001b[0mragged_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mragged_rows\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 423\u001b[0m )\n\u001b[1;32m 424\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mbk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mopen_workbook_2007_xml\u001b[0;34m(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)\u001b[0m\n\u001b[1;32m 831\u001b[0m \u001b[0mx12sheet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX12Sheet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogfile\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbosity\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 832\u001b[0m \u001b[0mheading\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Sheet %r (sheetx=%d) from %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msheetx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m \u001b[0mx12sheet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_stream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzflo\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 834\u001b[0m \u001b[0;32mdel\u001b[0m \u001b[0mzflo\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 835\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/xlrd/xlsx.py\u001b[0m in \u001b[0;36mown_process_stream\u001b[0;34m(self, stream, heading)\u001b[0m\n\u001b[1;32m 550\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"dimension\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 551\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_dimension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 552\u001b[0;31m \u001b[0;32melif\u001b[0m \u001b[0melem\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mU_SSML12\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"mergeCell\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 553\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_merge_cell\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfinish_off\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#fn='tef/TEF_Metrics_workbook_exemplar.xlsx'\n", | |
| "#get_ethnicity(fn)\n", | |
| "\n", | |
| "ethnicity=pd.DataFrame()\n", | |
| "\n", | |
| "for filename in [f for f in os.listdir(directory) if not f.startswith('~')]:\n", | |
| " ethnicity=pd.concat([ethnicity,get_ethnicity('{}/{}'.format(directory,filename))])\n", | |
| " \n", | |
| "ethnicity.to_csv('tef_ethnicity.csv',index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 631, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Category,BME,Black,Asian,Other,PRN,Type\r\n", | |
| "The teaching on my course,,,-,-,nan,FT\r\n", | |
| "Assessment and feedback ,++,++,,+,nan,FT\r\n", | |
| "Academic support,,-,,-,nan,FT\r\n", | |
| "Non-continuation,,++,-,-,nan,FT\r\n", | |
| "Employment or further study ,++,,+,+,nan,FT\r\n", | |
| "Highly skilled employment,++,++,,++,nan,FT\r\n", | |
| "The teaching on my course ,,+,- ,,nan,PT\r\n", | |
| "Assessment and feedback,,- ,-,,nan,PT\r\n", | |
| "Academic support ,,+,,,nan,PT\r\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!head tef_ethnicity.csv" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 176, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Asian</th>\n", | |
| " <th>BME</th>\n", | |
| " <th>Black</th>\n", | |
| " <th>Category</th>\n", | |
| " <th>Other</th>\n", | |
| " <th>PRN</th>\n", | |
| " <th>Type</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>The teaching on my course</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Assessment and feedback</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>+</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Academic support</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Non-continuation</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>Employment or further study</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>10007792</td>\n", | |
| " <td>FT</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Asian BME Black Category Other PRN Type\n", | |
| "0 NaN NaN NaN The teaching on my course NaN 10007792 FT\n", | |
| "1 NaN + NaN Assessment and feedback NaN 10007792 FT\n", | |
| "2 + + NaN Academic support NaN 10007792 FT\n", | |
| "3 NaN NaN NaN Non-continuation NaN 10007792 FT\n", | |
| "4 NaN NaN NaN Employment or further study NaN 10007792 FT" | |
| ] | |
| }, | |
| "execution_count": 176, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ethnicity.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment