Last active
July 4, 2019 12:50
-
-
Save hntrmrrs/b967725709252b8e91bdce0610863a38 to your computer and use it in GitHub Desktop.
HMRC Reporting Funds.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "HMRC Reporting Funds.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"private_outputs": true, | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/hntrmrrs/b967725709252b8e91bdce0610863a38/hmrc-reporting-funds.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "tTnC_IYM6x3u", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# Imports, constants, etc." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "CH347VMaZR7P", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from io import BytesIO\n", | |
"\n", | |
"import pandas as pd\n", | |
"import requests" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "HUuOPyslZGr0", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"#source_url = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/799390/List_of_reporting_funds_A-Z_2019.xlsm'\n", | |
"source_url = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/809973/List_of_reporting_funds_A-Z_2019.xlsm'\n", | |
"sheet_name = 'Web List'\n", | |
"\n", | |
"def download_sheet(source_url=source_url, sheet_name=sheet_name):\n", | |
" r = requests.get(source_url)\n", | |
" f = BytesIO(r.content)\n", | |
" return pd.read_excel(f, sheet_name=sheet_name, header=1, index_col=2)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "qvM0fpLF690P", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# Download and post-process" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BO_mZXFA7EDW", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"downloaded_df = download_sheet()" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "yrPC4-XrZhgp", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Drop anything with no ISIN or CUSIP\n", | |
"df = downloaded_df.dropna(subset=('ISIN No', 'CUSIP No'), how='all')\n", | |
"# Fix columns\n", | |
"df.columns = [x.strip() for x in df.columns]\n", | |
"# Drop anything which is no longer a reporting fund\n", | |
"df = df[df['Ceased to be an RF on'].isnull()]\n", | |
"# Drop anything which doesn't have either an ISIN starting 'US' or a 9-digit\n", | |
"# CUSIP starting with a number\n", | |
"df = df[(df['ISIN No'].str.startswith('US', na=False))\n", | |
" | ((~df['CUSIP No'].isnull())\n", | |
" & (df['CUSIP No'].str.len() == 9)\n", | |
" & (df['CUSIP No'].str[0].str.isnumeric()))]\n", | |
"df" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "AN2i059F7oXl", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment