Skip to content

Instantly share code, notes, and snippets.

@pmagwene
Created September 23, 2014 23:25
Show Gist options
  • Save pmagwene/abef4c132f4997fc303a to your computer and use it in GitHub Desktop.
Save pmagwene/abef4c132f4997fc303a to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "",
"signature": "sha256:4632c48d2d93ec86103808ed599b94a33dfdda25ab937192359f68ab92ee1ec0"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"## read the yeast-CCM.xls file\n",
"pheno = pd.read_excel('https://github.com/pmagwene/Bio723/raw/master/datasets/yeast-CCM.xls', 'Phenotypes')\n",
"geno = pd.read_excel('https://github.com/pmagwene/Bio723/raw/master/datasets/yeast-CCM.xls', 'Genotypes')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = pd.merge(pheno, geno, on='Strain')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# indexing with integers works\n",
"data.ix[[0,2,4]]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Strain</th>\n",
" <th>Segregant</th>\n",
" <th>Pool</th>\n",
" <th>CM.a</th>\n",
" <th>CM.b</th>\n",
" <th>CM.c</th>\n",
" <th>cAMP</th>\n",
" <th>Cyr1.expr</th>\n",
" <th>Flo11.expr</th>\n",
" <th>Adhes.a</th>\n",
" <th>Adhes.b</th>\n",
" <th>Adhes.c</th>\n",
" <th>Cyr1.geno</th>\n",
" <th>Flo11.geno</th>\n",
" <th>Ecm5.geno</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> PMY1235</td>\n",
" <td> s1</td>\n",
" <td> S</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 194.38</td>\n",
" <td> 1438</td>\n",
" <td> 2035</td>\n",
" <td> 0.0987</td>\n",
" <td> 0.1035</td>\n",
" <td> 0.1087</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> PMY1237</td>\n",
" <td> s3</td>\n",
" <td> S</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 245.32</td>\n",
" <td> 1292</td>\n",
" <td> 143961</td>\n",
" <td> 0.0870</td>\n",
" <td> 0.2564</td>\n",
" <td> 0.2591</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> PMY1239</td>\n",
" <td> s5</td>\n",
" <td> S</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 168.99</td>\n",
" <td> 1204</td>\n",
" <td> 11281</td>\n",
" <td> 0.1522</td>\n",
" <td> 0.1735</td>\n",
" <td> 0.1511</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
" Strain Segregant Pool CM.a CM.b CM.c cAMP Cyr1.expr Flo11.expr \\\n",
"0 PMY1235 s1 S 1 1 1 194.38 1438 2035 \n",
"2 PMY1237 s3 S 1 1 1 245.32 1292 143961 \n",
"4 PMY1239 s5 S 1 1 1 168.99 1204 11281 \n",
"\n",
" Adhes.a Adhes.b Adhes.c Cyr1.geno Flo11.geno Ecm5.geno \n",
"0 0.0987 0.1035 0.1087 S S S \n",
"2 0.0870 0.2564 0.2591 S S S \n",
"4 0.1522 0.1735 0.1511 S S S "
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# but indexing with strain names doesn't\n",
"data.ix[['PMY1235','PMY1237','PMY1239']]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Strain</th>\n",
" <th>Segregant</th>\n",
" <th>Pool</th>\n",
" <th>CM.a</th>\n",
" <th>CM.b</th>\n",
" <th>CM.c</th>\n",
" <th>cAMP</th>\n",
" <th>Cyr1.expr</th>\n",
" <th>Flo11.expr</th>\n",
" <th>Adhes.a</th>\n",
" <th>Adhes.b</th>\n",
" <th>Adhes.c</th>\n",
" <th>Cyr1.geno</th>\n",
" <th>Flo11.geno</th>\n",
" <th>Ecm5.geno</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>PMY1235</th>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PMY1237</th>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PMY1239</th>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
" Strain Segregant Pool CM.a CM.b CM.c cAMP Cyr1.expr Flo11.expr \\\n",
"PMY1235 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"PMY1237 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"PMY1239 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
" Adhes.a Adhes.b Adhes.c Cyr1.geno Flo11.geno Ecm5.geno \n",
"PMY1235 NaN NaN NaN NaN NaN NaN \n",
"PMY1237 NaN NaN NaN NaN NaN NaN \n",
"PMY1239 NaN NaN NaN NaN NaN NaN "
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# the above call didn't work because we hadn't told\n",
"# pandas to treat Strain as an indexing column\n",
"# Here's how we do it...\n",
"data.set_index('Strain', inplace=True)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# now it works!\n",
"data.ix[['PMY1235','PMY1237','PMY1239']]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Segregant</th>\n",
" <th>Pool</th>\n",
" <th>CM.a</th>\n",
" <th>CM.b</th>\n",
" <th>CM.c</th>\n",
" <th>cAMP</th>\n",
" <th>Cyr1.expr</th>\n",
" <th>Flo11.expr</th>\n",
" <th>Adhes.a</th>\n",
" <th>Adhes.b</th>\n",
" <th>Adhes.c</th>\n",
" <th>Cyr1.geno</th>\n",
" <th>Flo11.geno</th>\n",
" <th>Ecm5.geno</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>PMY1235</th>\n",
" <td> s1</td>\n",
" <td> S</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 194.38</td>\n",
" <td> 1438</td>\n",
" <td> 2035</td>\n",
" <td> 0.0987</td>\n",
" <td> 0.1035</td>\n",
" <td> 0.1087</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PMY1237</th>\n",
" <td> s3</td>\n",
" <td> S</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 245.32</td>\n",
" <td> 1292</td>\n",
" <td> 143961</td>\n",
" <td> 0.0870</td>\n",
" <td> 0.2564</td>\n",
" <td> 0.2591</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PMY1239</th>\n",
" <td> s5</td>\n",
" <td> S</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 168.99</td>\n",
" <td> 1204</td>\n",
" <td> 11281</td>\n",
" <td> 0.1522</td>\n",
" <td> 0.1735</td>\n",
" <td> 0.1511</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" <td> S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
" Segregant Pool CM.a CM.b CM.c cAMP Cyr1.expr Flo11.expr \\\n",
"PMY1235 s1 S 1 1 1 194.38 1438 2035 \n",
"PMY1237 s3 S 1 1 1 245.32 1292 143961 \n",
"PMY1239 s5 S 1 1 1 168.99 1204 11281 \n",
"\n",
" Adhes.a Adhes.b Adhes.c Cyr1.geno Flo11.geno Ecm5.geno \n",
"PMY1235 0.0987 0.1035 0.1087 S S S \n",
"PMY1237 0.0870 0.2564 0.2591 S S S \n",
"PMY1239 0.1522 0.1735 0.1511 S S S "
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment