Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save MikeTrizna/79569361ada7cd57da7c2c2b30b28d00 to your computer and use it in GitHub Desktop.
Save MikeTrizna/79569361ada7cd57da7c2c2b30b28d00 to your computer and use it in GitHub Desktop.
Dropbox (Smithsonian)/idigbio_ferns/Untitled.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "unmatched = pd.read_csv('data/taxonomy_matching/no_matches.tsv', sep='\\t')\nunmatched.head()",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": " idigbio_genus idigbio_species idigbio_sciname idigbio_count\n0 NaN NaN abies balsamea f. hudsonia 1\n1 NaN NaN abies balsamea var. macrocarpa 1\n2 NaN NaN abies homolepis 'prostrata' 1\n3 NaN NaN abies nigra 1\n4 NaN NaN acacia acuifera 1",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>idigbio_genus</th>\n <th>idigbio_species</th>\n <th>idigbio_sciname</th>\n <th>idigbio_count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies balsamea f. hudsonia</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies balsamea var. macrocarpa</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies homolepis 'prostrata'</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>abies nigra</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>acacia acuifera</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def split_sciname(row):\n sciname_split = row['idigbio_sciname'].split(' ')\n if len(sciname_split) == 2:\n row['split_genus'] = sciname_split[0].capitalize()\n row['split_species'] = sciname_split[1]\n return row",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "unmatched = unmatched.apply(split_sciname, axis='columns')\nunmatched.head()",
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": " idigbio_count idigbio_genus idigbio_sciname \\\n0 1 NaN abies balsamea f. hudsonia \n1 1 NaN abies balsamea var. macrocarpa \n2 1 NaN abies homolepis 'prostrata' \n3 1 NaN abies nigra \n4 1 NaN acacia acuifera \n\n idigbio_species split_genus split_species \n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN Abies nigra \n4 NaN Acacia acuifera ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>idigbio_count</th>\n <th>idigbio_genus</th>\n <th>idigbio_sciname</th>\n <th>idigbio_species</th>\n <th>split_genus</th>\n <th>split_species</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies balsamea f. hudsonia</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies balsamea var. macrocarpa</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies homolepis 'prostrata'</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>NaN</td>\n <td>abies nigra</td>\n <td>NaN</td>\n <td>Abies</td>\n <td>nigra</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1</td>\n <td>NaN</td>\n <td>acacia acuifera</td>\n <td>NaN</td>\n <td>Acacia</td>\n <td>acuifera</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.8",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Dropbox (Smithsonian)/idigbio_ferns/Untitled.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment