Last active
June 16, 2016 07:18
-
-
Save AKuederle/684a620bed1aac4444c30f238a3d975e to your computer and use it in GitHub Desktop.
A short example how to deal strange datatypes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sequence_ID, Sequence_Length, Hit_Count, Start, End, Strand | |
NM_172887.2,1-10753,1,10453,10459,+ | |
XM_006504928.1,1-10641,1,10364,10370,+ | |
XM_006504927.1,1-10650,1,10373,10379,+ | |
XM_006504926.1,1-10659,1,10382,10388,+ | |
NM_147219.2,1-8339,1,7632,7638,+ | |
XM_006533065.2,1-8166,1,7529,7535,+ | |
NM_009592.1,1-5759,3,2822,2828,+ | |
3867,3873,+ | |
3971,3977,+ | |
NM_009784.2,1-7415,4,520,526,+ | |
6391,6397,+ | |
6542,6548,+ | |
7302,7308,+ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"pd.options.mode.chained_assignment = None # default='warn'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sequence_ID</th>\n", | |
" <th>Sequence_Length</th>\n", | |
" <th>Hit_Count</th>\n", | |
" <th>Start</th>\n", | |
" <th>End</th>\n", | |
" <th>Strand</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NM_172887.2</td>\n", | |
" <td>1-10753</td>\n", | |
" <td>1</td>\n", | |
" <td>10453.0</td>\n", | |
" <td>10459.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>XM_006504928.1</td>\n", | |
" <td>1-10641</td>\n", | |
" <td>1</td>\n", | |
" <td>10364.0</td>\n", | |
" <td>10370.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>XM_006504927.1</td>\n", | |
" <td>1-10650</td>\n", | |
" <td>1</td>\n", | |
" <td>10373.0</td>\n", | |
" <td>10379.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>XM_006504926.1</td>\n", | |
" <td>1-10659</td>\n", | |
" <td>1</td>\n", | |
" <td>10382.0</td>\n", | |
" <td>10388.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NM_147219.2</td>\n", | |
" <td>1-8339</td>\n", | |
" <td>1</td>\n", | |
" <td>7632.0</td>\n", | |
" <td>7638.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>XM_006533065.2</td>\n", | |
" <td>1-8166</td>\n", | |
" <td>1</td>\n", | |
" <td>7529.0</td>\n", | |
" <td>7535.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>NM_009592.1</td>\n", | |
" <td>1-5759</td>\n", | |
" <td>3</td>\n", | |
" <td>2822.0</td>\n", | |
" <td>2828.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>3867</td>\n", | |
" <td>3873</td>\n", | |
" <td>+</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>3971</td>\n", | |
" <td>3977</td>\n", | |
" <td>+</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>520.0</td>\n", | |
" <td>526.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>6391</td>\n", | |
" <td>6397</td>\n", | |
" <td>+</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>6542</td>\n", | |
" <td>6548</td>\n", | |
" <td>+</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>7302</td>\n", | |
" <td>7308</td>\n", | |
" <td>+</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sequence_ID Sequence_Length Hit_Count Start End \\\n", | |
"0 NM_172887.2 1-10753 1 10453.0 10459.0 \n", | |
"1 XM_006504928.1 1-10641 1 10364.0 10370.0 \n", | |
"2 XM_006504927.1 1-10650 1 10373.0 10379.0 \n", | |
"3 XM_006504926.1 1-10659 1 10382.0 10388.0 \n", | |
"4 NM_147219.2 1-8339 1 7632.0 7638.0 \n", | |
"5 XM_006533065.2 1-8166 1 7529.0 7535.0 \n", | |
"6 NM_009592.1 1-5759 3 2822.0 2828.0 \n", | |
"7 3867 3873 + NaN NaN \n", | |
"8 3971 3977 + NaN NaN \n", | |
"9 NM_009784.2 1-7415 4 520.0 526.0 \n", | |
"10 6391 6397 + NaN NaN \n", | |
"11 6542 6548 + NaN NaN \n", | |
"12 7302 7308 + NaN NaN \n", | |
"\n", | |
" Strand \n", | |
"0 + \n", | |
"1 + \n", | |
"2 + \n", | |
"3 + \n", | |
"4 + \n", | |
"5 + \n", | |
"6 + \n", | |
"7 NaN \n", | |
"8 NaN \n", | |
"9 + \n", | |
"10 NaN \n", | |
"11 NaN \n", | |
"12 NaN " | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.read_csv('./file.txt')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['Sequence_ID', ' Sequence_Length', ' Hit_Count', ' Start', ' End', ' Strand']" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"header = list(df.columns.values)\n", | |
"header" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['Sequence_ID', 'Sequence_Length', 'Hit_Count', 'Start', 'End', 'Strand']" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new_header = [name.strip() for name in header]\n", | |
"new_header" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sequence_ID</th>\n", | |
" <th>Sequence_Length</th>\n", | |
" <th>Hit_Count</th>\n", | |
" <th>Start</th>\n", | |
" <th>End</th>\n", | |
" <th>Strand</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NM_172887.2</td>\n", | |
" <td>1-10753</td>\n", | |
" <td>1</td>\n", | |
" <td>10453.0</td>\n", | |
" <td>10459.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>XM_006504928.1</td>\n", | |
" <td>1-10641</td>\n", | |
" <td>1</td>\n", | |
" <td>10364.0</td>\n", | |
" <td>10370.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>XM_006504927.1</td>\n", | |
" <td>1-10650</td>\n", | |
" <td>1</td>\n", | |
" <td>10373.0</td>\n", | |
" <td>10379.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>XM_006504926.1</td>\n", | |
" <td>1-10659</td>\n", | |
" <td>1</td>\n", | |
" <td>10382.0</td>\n", | |
" <td>10388.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NM_147219.2</td>\n", | |
" <td>1-8339</td>\n", | |
" <td>1</td>\n", | |
" <td>7632.0</td>\n", | |
" <td>7638.0</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sequence_ID Sequence_Length Hit_Count Start End Strand\n", | |
"0 NM_172887.2 1-10753 1 10453.0 10459.0 +\n", | |
"1 XM_006504928.1 1-10641 1 10364.0 10370.0 +\n", | |
"2 XM_006504927.1 1-10650 1 10373.0 10379.0 +\n", | |
"3 XM_006504926.1 1-10659 1 10382.0 10388.0 +\n", | |
"4 NM_147219.2 1-8339 1 7632.0 7638.0 +" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.columns = new_header\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sequence_ID</th>\n", | |
" <th>Sequence_Length</th>\n", | |
" <th>Hit_Count</th>\n", | |
" <th>Start</th>\n", | |
" <th>End</th>\n", | |
" <th>Strand</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NM_172887.2</td>\n", | |
" <td>1-10753</td>\n", | |
" <td>1</td>\n", | |
" <td>10453</td>\n", | |
" <td>10459</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>XM_006504928.1</td>\n", | |
" <td>1-10641</td>\n", | |
" <td>1</td>\n", | |
" <td>10364</td>\n", | |
" <td>10370</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>XM_006504927.1</td>\n", | |
" <td>1-10650</td>\n", | |
" <td>1</td>\n", | |
" <td>10373</td>\n", | |
" <td>10379</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>XM_006504926.1</td>\n", | |
" <td>1-10659</td>\n", | |
" <td>1</td>\n", | |
" <td>10382</td>\n", | |
" <td>10388</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NM_147219.2</td>\n", | |
" <td>1-8339</td>\n", | |
" <td>1</td>\n", | |
" <td>7632</td>\n", | |
" <td>7638</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>XM_006533065.2</td>\n", | |
" <td>1-8166</td>\n", | |
" <td>1</td>\n", | |
" <td>7529</td>\n", | |
" <td>7535</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>NM_009592.1</td>\n", | |
" <td>1-5759</td>\n", | |
" <td>3</td>\n", | |
" <td>2822</td>\n", | |
" <td>2828</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>3867</td>\n", | |
" <td>3873</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>3971</td>\n", | |
" <td>3977</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>520</td>\n", | |
" <td>526</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>6391</td>\n", | |
" <td>6397</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>6542</td>\n", | |
" <td>6548</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>7302</td>\n", | |
" <td>7308</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sequence_ID Sequence_Length Hit_Count Start \\\n", | |
"0 NM_172887.2 1-10753 1 10453 \n", | |
"1 XM_006504928.1 1-10641 1 10364 \n", | |
"2 XM_006504927.1 1-10650 1 10373 \n", | |
"3 XM_006504926.1 1-10659 1 10382 \n", | |
"4 NM_147219.2 1-8339 1 7632 \n", | |
"5 XM_006533065.2 1-8166 1 7529 \n", | |
"6 NM_009592.1 1-5759 3 2822 \n", | |
"7 NaN NaN NaN 3867 \n", | |
"8 NaN NaN NaN 3971 \n", | |
"9 NM_009784.2 1-7415 4 520 \n", | |
"10 NaN NaN NaN 6391 \n", | |
"11 NaN NaN NaN 6542 \n", | |
"12 NaN NaN NaN 7302 \n", | |
"\n", | |
" End Strand \n", | |
"0 10459 + \n", | |
"1 10370 + \n", | |
"2 10379 + \n", | |
"3 10388 + \n", | |
"4 7638 + \n", | |
"5 7535 + \n", | |
"6 2828 + \n", | |
"7 3873 + \n", | |
"8 3977 + \n", | |
"9 526 + \n", | |
"10 6397 + \n", | |
"11 6548 + \n", | |
"12 7308 + " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nan_mask = df.Strand.isnull()\n", | |
"df.Start[nan_mask] = df.Sequence_ID[nan_mask]\n", | |
"df.Sequence_ID[nan_mask] = np.nan\n", | |
"df.End[nan_mask] = df.Sequence_Length[nan_mask]\n", | |
"df.Sequence_Length[nan_mask] = np.nan\n", | |
"df.Strand[nan_mask] = df.Hit_Count[nan_mask]\n", | |
"df.Hit_Count[nan_mask] = np.nan\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sequence_ID</th>\n", | |
" <th>Sequence_Length</th>\n", | |
" <th>Hit_Count</th>\n", | |
" <th>Start</th>\n", | |
" <th>End</th>\n", | |
" <th>Strand</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NM_172887.2</td>\n", | |
" <td>1-10753</td>\n", | |
" <td>1</td>\n", | |
" <td>10453</td>\n", | |
" <td>10459</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>XM_006504928.1</td>\n", | |
" <td>1-10641</td>\n", | |
" <td>1</td>\n", | |
" <td>10364</td>\n", | |
" <td>10370</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>XM_006504927.1</td>\n", | |
" <td>1-10650</td>\n", | |
" <td>1</td>\n", | |
" <td>10373</td>\n", | |
" <td>10379</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>XM_006504926.1</td>\n", | |
" <td>1-10659</td>\n", | |
" <td>1</td>\n", | |
" <td>10382</td>\n", | |
" <td>10388</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NM_147219.2</td>\n", | |
" <td>1-8339</td>\n", | |
" <td>1</td>\n", | |
" <td>7632</td>\n", | |
" <td>7638</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>XM_006533065.2</td>\n", | |
" <td>1-8166</td>\n", | |
" <td>1</td>\n", | |
" <td>7529</td>\n", | |
" <td>7535</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>NM_009592.1</td>\n", | |
" <td>1-5759</td>\n", | |
" <td>3</td>\n", | |
" <td>2822</td>\n", | |
" <td>2828</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td>3867</td>\n", | |
" <td>3873</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td>3971</td>\n", | |
" <td>3977</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>520</td>\n", | |
" <td>526</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td>6391</td>\n", | |
" <td>6397</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td>6542</td>\n", | |
" <td>6548</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td></td>\n", | |
" <td>7302</td>\n", | |
" <td>7308</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sequence_ID Sequence_Length Hit_Count Start \\\n", | |
"0 NM_172887.2 1-10753 1 10453 \n", | |
"1 XM_006504928.1 1-10641 1 10364 \n", | |
"2 XM_006504927.1 1-10650 1 10373 \n", | |
"3 XM_006504926.1 1-10659 1 10382 \n", | |
"4 NM_147219.2 1-8339 1 7632 \n", | |
"5 XM_006533065.2 1-8166 1 7529 \n", | |
"6 NM_009592.1 1-5759 3 2822 \n", | |
"7 3867 \n", | |
"8 3971 \n", | |
"9 NM_009784.2 1-7415 4 520 \n", | |
"10 6391 \n", | |
"11 6542 \n", | |
"12 7302 \n", | |
"\n", | |
" End Strand \n", | |
"0 10459 + \n", | |
"1 10370 + \n", | |
"2 10379 + \n", | |
"3 10388 + \n", | |
"4 7638 + \n", | |
"5 7535 + \n", | |
"6 2828 + \n", | |
"7 3873 + \n", | |
"8 3977 + \n", | |
"9 526 + \n", | |
"10 6397 + \n", | |
"11 6548 + \n", | |
"12 7308 + " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.fillna(\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Sequence_ID</th>\n", | |
" <th>Sequence_Length</th>\n", | |
" <th>Hit_Count</th>\n", | |
" <th>Start</th>\n", | |
" <th>End</th>\n", | |
" <th>Strand</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NM_172887.2</td>\n", | |
" <td>1-10753</td>\n", | |
" <td>1</td>\n", | |
" <td>10453</td>\n", | |
" <td>10459</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>XM_006504928.1</td>\n", | |
" <td>1-10641</td>\n", | |
" <td>1</td>\n", | |
" <td>10364</td>\n", | |
" <td>10370</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>XM_006504927.1</td>\n", | |
" <td>1-10650</td>\n", | |
" <td>1</td>\n", | |
" <td>10373</td>\n", | |
" <td>10379</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>XM_006504926.1</td>\n", | |
" <td>1-10659</td>\n", | |
" <td>1</td>\n", | |
" <td>10382</td>\n", | |
" <td>10388</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NM_147219.2</td>\n", | |
" <td>1-8339</td>\n", | |
" <td>1</td>\n", | |
" <td>7632</td>\n", | |
" <td>7638</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>XM_006533065.2</td>\n", | |
" <td>1-8166</td>\n", | |
" <td>1</td>\n", | |
" <td>7529</td>\n", | |
" <td>7535</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>NM_009592.1</td>\n", | |
" <td>1-5759</td>\n", | |
" <td>3</td>\n", | |
" <td>2822</td>\n", | |
" <td>2828</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>NM_009592.1</td>\n", | |
" <td>1-5759</td>\n", | |
" <td>3</td>\n", | |
" <td>3867</td>\n", | |
" <td>3873</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>NM_009592.1</td>\n", | |
" <td>1-5759</td>\n", | |
" <td>3</td>\n", | |
" <td>3971</td>\n", | |
" <td>3977</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>520</td>\n", | |
" <td>526</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>6391</td>\n", | |
" <td>6397</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>6542</td>\n", | |
" <td>6548</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>NM_009784.2</td>\n", | |
" <td>1-7415</td>\n", | |
" <td>4</td>\n", | |
" <td>7302</td>\n", | |
" <td>7308</td>\n", | |
" <td>+</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Sequence_ID Sequence_Length Hit_Count Start \\\n", | |
"0 NM_172887.2 1-10753 1 10453 \n", | |
"1 XM_006504928.1 1-10641 1 10364 \n", | |
"2 XM_006504927.1 1-10650 1 10373 \n", | |
"3 XM_006504926.1 1-10659 1 10382 \n", | |
"4 NM_147219.2 1-8339 1 7632 \n", | |
"5 XM_006533065.2 1-8166 1 7529 \n", | |
"6 NM_009592.1 1-5759 3 2822 \n", | |
"7 NM_009592.1 1-5759 3 3867 \n", | |
"8 NM_009592.1 1-5759 3 3971 \n", | |
"9 NM_009784.2 1-7415 4 520 \n", | |
"10 NM_009784.2 1-7415 4 6391 \n", | |
"11 NM_009784.2 1-7415 4 6542 \n", | |
"12 NM_009784.2 1-7415 4 7302 \n", | |
"\n", | |
" End Strand \n", | |
"0 10459 + \n", | |
"1 10370 + \n", | |
"2 10379 + \n", | |
"3 10388 + \n", | |
"4 7638 + \n", | |
"5 7535 + \n", | |
"6 2828 + \n", | |
"7 3873 + \n", | |
"8 3977 + \n", | |
"9 526 + \n", | |
"10 6397 + \n", | |
"11 6548 + \n", | |
"12 7308 + " | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.fillna(method='ffill')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment