Created
February 8, 2019 19:42
-
-
Save MikeTrizna/79569361ada7cd57da7c2c2b30b28d00 to your computer and use it in GitHub Desktop.
Dropbox (Smithsonian)/idigbio_ferns/Untitled.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "unmatched = pd.read_csv('data/taxonomy_matching/no_matches.tsv', sep='\\t')\nunmatched.head()", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": " idigbio_genus idigbio_species idigbio_sciname idigbio_count\n0 NaN NaN abies balsamea f. hudsonia 1\n1 NaN NaN abies balsamea var. macrocarpa 1\n2 NaN NaN abies homolepis 'prostrata' 1\n3 NaN NaN abies nigra 1\n4 NaN NaN acacia acuifera 1", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>idigbio_genus</th>\n <th>idigbio_species</th>\n <th>idigbio_sciname</th>\n <th>idigbio_count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies balsamea f. hudsonia</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies balsamea var. macrocarpa</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies homolepis 'prostrata'</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies nigra</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>acacia acuifera</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def split_sciname(row):\n sciname_split = row['idigbio_sciname'].split(' ')\n if len(sciname_split) == 2:\n row['split_genus'] = sciname_split[0].capitalize()\n row['split_species'] = sciname_split[1]\n return row", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "unmatched = unmatched.apply(split_sciname, axis='columns')\nunmatched.head()", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": " idigbio_count idigbio_genus idigbio_sciname \\\n0 1 NaN abies balsamea f. hudsonia \n1 1 NaN abies balsamea var. macrocarpa \n2 1 NaN abies homolepis 'prostrata' \n3 1 NaN abies nigra \n4 1 NaN acacia acuifera \n\n idigbio_species split_genus split_species \n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN Abies nigra \n4 NaN Acacia acuifera ", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>idigbio_count</th>\n <th>idigbio_genus</th>\n <th>idigbio_sciname</th>\n <th>idigbio_species</th>\n <th>split_genus</th>\n <th>split_species</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies balsamea f. hudsonia</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies balsamea var. macrocarpa</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies homolepis 'prostrata'</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies nigra</td>\n <td>NaN</td>\n <td>Abies</td>\n <td>nigra</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1</td>\n <td>NaN</td>\n <td>acacia acuifera</td>\n <td>NaN</td>\n <td>Acacia</td>\n <td>acuifera</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.6.8", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Dropbox (Smithsonian)/idigbio_ferns/Untitled.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment