Last active
June 16, 2016 07:18
-
-
Save AKuederle/684a620bed1aac4444c30f238a3d975e to your computer and use it in GitHub Desktop.
A short example how to deal strange datatypes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Sequence_ID, Sequence_Length, Hit_Count, Start, End, Strand | |
| NM_172887.2,1-10753,1,10453,10459,+ | |
| XM_006504928.1,1-10641,1,10364,10370,+ | |
| XM_006504927.1,1-10650,1,10373,10379,+ | |
| XM_006504926.1,1-10659,1,10382,10388,+ | |
| NM_147219.2,1-8339,1,7632,7638,+ | |
| XM_006533065.2,1-8166,1,7529,7535,+ | |
| NM_009592.1,1-5759,3,2822,2828,+ | |
| 3867,3873,+ | |
| 3971,3977,+ | |
| NM_009784.2,1-7415,4,520,526,+ | |
| 6391,6397,+ | |
| 6542,6548,+ | |
| 7302,7308,+ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n", | |
| "pd.options.mode.chained_assignment = None # default='warn'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sequence_ID</th>\n", | |
| " <th>Sequence_Length</th>\n", | |
| " <th>Hit_Count</th>\n", | |
| " <th>Start</th>\n", | |
| " <th>End</th>\n", | |
| " <th>Strand</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NM_172887.2</td>\n", | |
| " <td>1-10753</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10453.0</td>\n", | |
| " <td>10459.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>XM_006504928.1</td>\n", | |
| " <td>1-10641</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10364.0</td>\n", | |
| " <td>10370.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>XM_006504927.1</td>\n", | |
| " <td>1-10650</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10373.0</td>\n", | |
| " <td>10379.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>XM_006504926.1</td>\n", | |
| " <td>1-10659</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10382.0</td>\n", | |
| " <td>10388.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>NM_147219.2</td>\n", | |
| " <td>1-8339</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7632.0</td>\n", | |
| " <td>7638.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>XM_006533065.2</td>\n", | |
| " <td>1-8166</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7529.0</td>\n", | |
| " <td>7535.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>NM_009592.1</td>\n", | |
| " <td>1-5759</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2822.0</td>\n", | |
| " <td>2828.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>3867</td>\n", | |
| " <td>3873</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>3971</td>\n", | |
| " <td>3977</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>520.0</td>\n", | |
| " <td>526.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>6391</td>\n", | |
| " <td>6397</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>6542</td>\n", | |
| " <td>6548</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>7302</td>\n", | |
| " <td>7308</td>\n", | |
| " <td>+</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sequence_ID Sequence_Length Hit_Count Start End \\\n", | |
| "0 NM_172887.2 1-10753 1 10453.0 10459.0 \n", | |
| "1 XM_006504928.1 1-10641 1 10364.0 10370.0 \n", | |
| "2 XM_006504927.1 1-10650 1 10373.0 10379.0 \n", | |
| "3 XM_006504926.1 1-10659 1 10382.0 10388.0 \n", | |
| "4 NM_147219.2 1-8339 1 7632.0 7638.0 \n", | |
| "5 XM_006533065.2 1-8166 1 7529.0 7535.0 \n", | |
| "6 NM_009592.1 1-5759 3 2822.0 2828.0 \n", | |
| "7 3867 3873 + NaN NaN \n", | |
| "8 3971 3977 + NaN NaN \n", | |
| "9 NM_009784.2 1-7415 4 520.0 526.0 \n", | |
| "10 6391 6397 + NaN NaN \n", | |
| "11 6542 6548 + NaN NaN \n", | |
| "12 7302 7308 + NaN NaN \n", | |
| "\n", | |
| " Strand \n", | |
| "0 + \n", | |
| "1 + \n", | |
| "2 + \n", | |
| "3 + \n", | |
| "4 + \n", | |
| "5 + \n", | |
| "6 + \n", | |
| "7 NaN \n", | |
| "8 NaN \n", | |
| "9 + \n", | |
| "10 NaN \n", | |
| "11 NaN \n", | |
| "12 NaN " | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.read_csv('./file.txt')\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['Sequence_ID', ' Sequence_Length', ' Hit_Count', ' Start', ' End', ' Strand']" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "header = list(df.columns.values)\n", | |
| "header" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['Sequence_ID', 'Sequence_Length', 'Hit_Count', 'Start', 'End', 'Strand']" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "new_header = [name.strip() for name in header]\n", | |
| "new_header" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sequence_ID</th>\n", | |
| " <th>Sequence_Length</th>\n", | |
| " <th>Hit_Count</th>\n", | |
| " <th>Start</th>\n", | |
| " <th>End</th>\n", | |
| " <th>Strand</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NM_172887.2</td>\n", | |
| " <td>1-10753</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10453.0</td>\n", | |
| " <td>10459.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>XM_006504928.1</td>\n", | |
| " <td>1-10641</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10364.0</td>\n", | |
| " <td>10370.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>XM_006504927.1</td>\n", | |
| " <td>1-10650</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10373.0</td>\n", | |
| " <td>10379.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>XM_006504926.1</td>\n", | |
| " <td>1-10659</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10382.0</td>\n", | |
| " <td>10388.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>NM_147219.2</td>\n", | |
| " <td>1-8339</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7632.0</td>\n", | |
| " <td>7638.0</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sequence_ID Sequence_Length Hit_Count Start End Strand\n", | |
| "0 NM_172887.2 1-10753 1 10453.0 10459.0 +\n", | |
| "1 XM_006504928.1 1-10641 1 10364.0 10370.0 +\n", | |
| "2 XM_006504927.1 1-10650 1 10373.0 10379.0 +\n", | |
| "3 XM_006504926.1 1-10659 1 10382.0 10388.0 +\n", | |
| "4 NM_147219.2 1-8339 1 7632.0 7638.0 +" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns = new_header\n", | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sequence_ID</th>\n", | |
| " <th>Sequence_Length</th>\n", | |
| " <th>Hit_Count</th>\n", | |
| " <th>Start</th>\n", | |
| " <th>End</th>\n", | |
| " <th>Strand</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NM_172887.2</td>\n", | |
| " <td>1-10753</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10453</td>\n", | |
| " <td>10459</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>XM_006504928.1</td>\n", | |
| " <td>1-10641</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10364</td>\n", | |
| " <td>10370</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>XM_006504927.1</td>\n", | |
| " <td>1-10650</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10373</td>\n", | |
| " <td>10379</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>XM_006504926.1</td>\n", | |
| " <td>1-10659</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10382</td>\n", | |
| " <td>10388</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>NM_147219.2</td>\n", | |
| " <td>1-8339</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7632</td>\n", | |
| " <td>7638</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>XM_006533065.2</td>\n", | |
| " <td>1-8166</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7529</td>\n", | |
| " <td>7535</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>NM_009592.1</td>\n", | |
| " <td>1-5759</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2822</td>\n", | |
| " <td>2828</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>3867</td>\n", | |
| " <td>3873</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>3971</td>\n", | |
| " <td>3977</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>520</td>\n", | |
| " <td>526</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>6391</td>\n", | |
| " <td>6397</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>6542</td>\n", | |
| " <td>6548</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>7302</td>\n", | |
| " <td>7308</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sequence_ID Sequence_Length Hit_Count Start \\\n", | |
| "0 NM_172887.2 1-10753 1 10453 \n", | |
| "1 XM_006504928.1 1-10641 1 10364 \n", | |
| "2 XM_006504927.1 1-10650 1 10373 \n", | |
| "3 XM_006504926.1 1-10659 1 10382 \n", | |
| "4 NM_147219.2 1-8339 1 7632 \n", | |
| "5 XM_006533065.2 1-8166 1 7529 \n", | |
| "6 NM_009592.1 1-5759 3 2822 \n", | |
| "7 NaN NaN NaN 3867 \n", | |
| "8 NaN NaN NaN 3971 \n", | |
| "9 NM_009784.2 1-7415 4 520 \n", | |
| "10 NaN NaN NaN 6391 \n", | |
| "11 NaN NaN NaN 6542 \n", | |
| "12 NaN NaN NaN 7302 \n", | |
| "\n", | |
| " End Strand \n", | |
| "0 10459 + \n", | |
| "1 10370 + \n", | |
| "2 10379 + \n", | |
| "3 10388 + \n", | |
| "4 7638 + \n", | |
| "5 7535 + \n", | |
| "6 2828 + \n", | |
| "7 3873 + \n", | |
| "8 3977 + \n", | |
| "9 526 + \n", | |
| "10 6397 + \n", | |
| "11 6548 + \n", | |
| "12 7308 + " | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "nan_mask = df.Strand.isnull()\n", | |
| "df.Start[nan_mask] = df.Sequence_ID[nan_mask]\n", | |
| "df.Sequence_ID[nan_mask] = np.nan\n", | |
| "df.End[nan_mask] = df.Sequence_Length[nan_mask]\n", | |
| "df.Sequence_Length[nan_mask] = np.nan\n", | |
| "df.Strand[nan_mask] = df.Hit_Count[nan_mask]\n", | |
| "df.Hit_Count[nan_mask] = np.nan\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sequence_ID</th>\n", | |
| " <th>Sequence_Length</th>\n", | |
| " <th>Hit_Count</th>\n", | |
| " <th>Start</th>\n", | |
| " <th>End</th>\n", | |
| " <th>Strand</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NM_172887.2</td>\n", | |
| " <td>1-10753</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10453</td>\n", | |
| " <td>10459</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>XM_006504928.1</td>\n", | |
| " <td>1-10641</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10364</td>\n", | |
| " <td>10370</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>XM_006504927.1</td>\n", | |
| " <td>1-10650</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10373</td>\n", | |
| " <td>10379</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>XM_006504926.1</td>\n", | |
| " <td>1-10659</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10382</td>\n", | |
| " <td>10388</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>NM_147219.2</td>\n", | |
| " <td>1-8339</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7632</td>\n", | |
| " <td>7638</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>XM_006533065.2</td>\n", | |
| " <td>1-8166</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7529</td>\n", | |
| " <td>7535</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>NM_009592.1</td>\n", | |
| " <td>1-5759</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2822</td>\n", | |
| " <td>2828</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>3867</td>\n", | |
| " <td>3873</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>3971</td>\n", | |
| " <td>3977</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>520</td>\n", | |
| " <td>526</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>6391</td>\n", | |
| " <td>6397</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>6542</td>\n", | |
| " <td>6548</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td></td>\n", | |
| " <td>7302</td>\n", | |
| " <td>7308</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sequence_ID Sequence_Length Hit_Count Start \\\n", | |
| "0 NM_172887.2 1-10753 1 10453 \n", | |
| "1 XM_006504928.1 1-10641 1 10364 \n", | |
| "2 XM_006504927.1 1-10650 1 10373 \n", | |
| "3 XM_006504926.1 1-10659 1 10382 \n", | |
| "4 NM_147219.2 1-8339 1 7632 \n", | |
| "5 XM_006533065.2 1-8166 1 7529 \n", | |
| "6 NM_009592.1 1-5759 3 2822 \n", | |
| "7 3867 \n", | |
| "8 3971 \n", | |
| "9 NM_009784.2 1-7415 4 520 \n", | |
| "10 6391 \n", | |
| "11 6542 \n", | |
| "12 7302 \n", | |
| "\n", | |
| " End Strand \n", | |
| "0 10459 + \n", | |
| "1 10370 + \n", | |
| "2 10379 + \n", | |
| "3 10388 + \n", | |
| "4 7638 + \n", | |
| "5 7535 + \n", | |
| "6 2828 + \n", | |
| "7 3873 + \n", | |
| "8 3977 + \n", | |
| "9 526 + \n", | |
| "10 6397 + \n", | |
| "11 6548 + \n", | |
| "12 7308 + " | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.fillna(\"\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Sequence_ID</th>\n", | |
| " <th>Sequence_Length</th>\n", | |
| " <th>Hit_Count</th>\n", | |
| " <th>Start</th>\n", | |
| " <th>End</th>\n", | |
| " <th>Strand</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>NM_172887.2</td>\n", | |
| " <td>1-10753</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10453</td>\n", | |
| " <td>10459</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>XM_006504928.1</td>\n", | |
| " <td>1-10641</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10364</td>\n", | |
| " <td>10370</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>XM_006504927.1</td>\n", | |
| " <td>1-10650</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10373</td>\n", | |
| " <td>10379</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>XM_006504926.1</td>\n", | |
| " <td>1-10659</td>\n", | |
| " <td>1</td>\n", | |
| " <td>10382</td>\n", | |
| " <td>10388</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>NM_147219.2</td>\n", | |
| " <td>1-8339</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7632</td>\n", | |
| " <td>7638</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>XM_006533065.2</td>\n", | |
| " <td>1-8166</td>\n", | |
| " <td>1</td>\n", | |
| " <td>7529</td>\n", | |
| " <td>7535</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>NM_009592.1</td>\n", | |
| " <td>1-5759</td>\n", | |
| " <td>3</td>\n", | |
| " <td>2822</td>\n", | |
| " <td>2828</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>NM_009592.1</td>\n", | |
| " <td>1-5759</td>\n", | |
| " <td>3</td>\n", | |
| " <td>3867</td>\n", | |
| " <td>3873</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>NM_009592.1</td>\n", | |
| " <td>1-5759</td>\n", | |
| " <td>3</td>\n", | |
| " <td>3971</td>\n", | |
| " <td>3977</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>520</td>\n", | |
| " <td>526</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>6391</td>\n", | |
| " <td>6397</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>6542</td>\n", | |
| " <td>6548</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>NM_009784.2</td>\n", | |
| " <td>1-7415</td>\n", | |
| " <td>4</td>\n", | |
| " <td>7302</td>\n", | |
| " <td>7308</td>\n", | |
| " <td>+</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Sequence_ID Sequence_Length Hit_Count Start \\\n", | |
| "0 NM_172887.2 1-10753 1 10453 \n", | |
| "1 XM_006504928.1 1-10641 1 10364 \n", | |
| "2 XM_006504927.1 1-10650 1 10373 \n", | |
| "3 XM_006504926.1 1-10659 1 10382 \n", | |
| "4 NM_147219.2 1-8339 1 7632 \n", | |
| "5 XM_006533065.2 1-8166 1 7529 \n", | |
| "6 NM_009592.1 1-5759 3 2822 \n", | |
| "7 NM_009592.1 1-5759 3 3867 \n", | |
| "8 NM_009592.1 1-5759 3 3971 \n", | |
| "9 NM_009784.2 1-7415 4 520 \n", | |
| "10 NM_009784.2 1-7415 4 6391 \n", | |
| "11 NM_009784.2 1-7415 4 6542 \n", | |
| "12 NM_009784.2 1-7415 4 7302 \n", | |
| "\n", | |
| " End Strand \n", | |
| "0 10459 + \n", | |
| "1 10370 + \n", | |
| "2 10379 + \n", | |
| "3 10388 + \n", | |
| "4 7638 + \n", | |
| "5 7535 + \n", | |
| "6 2828 + \n", | |
| "7 3873 + \n", | |
| "8 3977 + \n", | |
| "9 526 + \n", | |
| "10 6397 + \n", | |
| "11 6548 + \n", | |
| "12 7308 + " | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.fillna(method='ffill')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment