Created
April 8, 2024 03:55
-
-
Save avivajpeyi/d86dd6b1302c261291ed0aea135370e6 to your computer and use it in GitHub Desktop.
nz_retirement_webscraper.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyOXV6gewudRTtBO4U2WaXYi", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/avivajpeyi/d86dd6b1302c261291ed0aea135370e6/nz_retirement_webscraper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Download NZ retirement village data" | |
], | |
"metadata": { | |
"id": "nL88I63B1DLo" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "x4fpDo9r0OBC", | |
"outputId": "df686531-e716-4034-c073-850ed1921c36" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Cloning into 'nz_retirement_webscraper'...\n", | |
"remote: Enumerating objects: 324, done.\u001b[K\n", | |
"remote: Counting objects: 100% (324/324), done.\u001b[K\n", | |
"remote: Compressing objects: 100% (179/179), done.\u001b[K\n", | |
"remote: Total 324 (delta 90), reused 320 (delta 87), pack-reused 0\u001b[K\n", | |
"Receiving objects: 100% (324/324), 1.79 MiB | 11.02 MiB/s, done.\n", | |
"Resolving deltas: 100% (90/90), done.\n" | |
] | |
} | |
], | |
"source": [ | |
"! git clone https://github.com/avivajpeyi/nz_retirement_webscraper.git\n", | |
"%cd nz_retirement_webscraper\n", | |
"! pwd\n", | |
"! pip install -r requirements.txt\n", | |
"! scrapy crawl retirement -o data.json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import json\n", | |
"\n", | |
"with open('data.json', 'r') as file:\n", | |
" data = json.load(file)\n", | |
"\n", | |
"num_elements = len(data)\n", | |
"print(\"Number of pages in the JSON:\", num_elements)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "IwDtOJ2G0tC_", | |
"outputId": "60e87666-7ead-414c-f0dc-1c1fdbb89733" | |
}, | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Number of pages in the JSON: 485\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment