Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save armsp/094a03fd7084ac54597b9d50d2cd9bc0 to your computer and use it in GitHub Desktop.
Save armsp/094a03fd7084ac54597b9d50d2cd9bc0 to your computer and use it in GitHub Desktop.
Notebook that shows the issue with finding the cell that contains a given coordinate
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import geopandas as gpd"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"usa = gpd.read_file('D:/ai/Climate/satellite_nature/0_25deg/shapefile')\n",
"usa = usa[usa['iso'] == 'USA']"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Invalid geometries: 28\n"
]
}
],
"source": [
"invalid = usa.loc[~usa.geometry.is_valid] #Select rows which are not (~) valid\n",
"print(f\"Invalid geometries: {len(invalid)}\")"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"# list of coordinates that cause issues\n",
"\n",
"lons = [-89.3985, -80.552238000000003]\n",
"lats = [39.7392, 37.161718]"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame({'lon': lons, 'lat': lats})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Just to show that the coordinates are within USA"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<style>\n",
" #altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee.vega-embed {\n",
" width: 100%;\n",
" display: flex;\n",
" }\n",
"\n",
" #altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee.vega-embed details,\n",
" #altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee.vega-embed details summary {\n",
" position: relative;\n",
" }\n",
"</style>\n",
"<div id=\"altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee\") {\n",
" outputDiv = document.getElementById(\"altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee\");\n",
" }\n",
"\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm/[email protected]?noext\",\n",
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
" };\n",
"\n",
" function maybeLoadScript(lib, version) {\n",
" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
" return (VEGA_DEBUG[key] == version) ?\n",
" Promise.resolve(paths[lib]) :\n",
" new Promise(function(resolve, reject) {\n",
" var s = document.createElement('script');\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" s.async = true;\n",
" s.onload = () => {\n",
" VEGA_DEBUG[key] = version;\n",
" return resolve(paths[lib]);\n",
" };\n",
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
" s.src = paths[lib];\n",
" });\n",
" }\n",
"\n",
" function showError(err) {\n",
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
" throw err;\n",
" }\n",
"\n",
" function displayChart(vegaEmbed) {\n",
" vegaEmbed(outputDiv, spec, embedOpt)\n",
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
" }\n",
"\n",
" if(typeof define === \"function\" && define.amd) {\n",
" requirejs.config({paths});\n",
" let deps = [\"vega-embed\"];\n",
" require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
" } else {\n",
" maybeLoadScript(\"vega\", \"5\")\n",
" .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
" .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n",
" }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"layer\": [{\"data\": {\"url\": \"https://cdn.jsdelivr.net/npm/[email protected]/data/us-10m.json\", \"format\": {\"feature\": \"states\", \"type\": \"topojson\"}}, \"mark\": {\"type\": \"geoshape\", \"fill\": \"lightgray\", \"stroke\": \"white\"}, \"projection\": {\"type\": \"albersUsa\"}}, {\"data\": {\"name\": \"data-d1f1eed0de29e3cf3ac88c46d8170b48\"}, \"mark\": {\"type\": \"point\", \"color\": \"red\", \"shape\": \"cross\"}, \"encoding\": {\"latitude\": {\"field\": \"lat\", \"type\": \"quantitative\"}, \"longitude\": {\"field\": \"lon\", \"type\": \"quantitative\"}}}], \"height\": 800, \"width\": 1000, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-d1f1eed0de29e3cf3ac88c46d8170b48\": [{\"lon\": -89.3985, \"lat\": 39.7392}, {\"lon\": -80.552238, \"lat\": 37.161718}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>"
],
"text/plain": [
"alt.LayerChart(...)"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import altair as alt\n",
"alt.data_transformers.disable_max_rows()\n",
"from vega_datasets import data\n",
"\n",
"# Read in polygons from topojson\n",
"states = alt.topo_feature(data.us_10m.url, feature='states')\n",
"# US states background\n",
"background = alt.Chart(states).mark_geoshape(\n",
" fill='lightgray',\n",
" stroke='white'\n",
").properties(\n",
" width=1000,\n",
" height=800\n",
").project('albersUsa')\n",
"f = alt.Chart(df).mark_point(shape='cross', color='red').encode(longitude='lon', latitude='lat')\n",
"background + f"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=\"EPSG:4326\") # WGS84 # to use longitude and latitude you need to set the coordinate reference system (CRS) to WGS84"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lon</th>\n",
" <th>lat</th>\n",
" <th>geometry</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-89.398500</td>\n",
" <td>39.739200</td>\n",
" <td>POINT (-89.3985 39.7392)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-80.552238</td>\n",
" <td>37.161718</td>\n",
" <td>POINT (-80.55224 37.16172)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lon lat geometry\n",
"0 -89.398500 39.739200 POINT (-89.3985 39.7392)\n",
"1 -80.552238 37.161718 POINT (-80.55224 37.16172)"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdf"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" iso fid_2 cell_id sbcll_d s__0_25 \\\n",
"258630 USA 289450.0 18091 3 2 \n",
"\n",
" geometry \n",
"258630 POLYGON ((-89.25 39.5, -89.5 39.5, -89.5 39.75... \n"
]
},
{
"ename": "GEOSException",
"evalue": "TopologyException: side location conflict at -80.552238000000003 37.161718. This can occur if the input geometry is invalid.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mGEOSException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[114], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m lon, lat \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(lons, lats):\n\u001b[1;32m----> 2\u001b[0m cell \u001b[38;5;241m=\u001b[39m usa[\u001b[43musa\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontains\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpoints_from_xy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlon\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mlat\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m]\t\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(cell)\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\base.py:2298\u001b[0m, in \u001b[0;36mGeoPandasBase.contains\u001b[1;34m(self, other, align)\u001b[0m\n\u001b[0;32m 2184\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcontains\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, align\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 2185\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for\u001b[39;00m\n\u001b[0;32m 2186\u001b[0m \u001b[38;5;124;03m each aligned geometry that contains `other`.\u001b[39;00m\n\u001b[0;32m 2187\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2296\u001b[0m \u001b[38;5;124;03m GeoSeries.within\u001b[39;00m\n\u001b[0;32m 2297\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_binary_op\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontains\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malign\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\base.py:76\u001b[0m, in \u001b[0;36m_binary_op\u001b[1;34m(op, this, other, align, *args, **kwargs)\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_binary_op\u001b[39m(op, this, other, align, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 74\u001b[0m \u001b[38;5;66;03m# type: (str, GeoSeries, GeoSeries, args/kwargs) -> Series[bool/float]\u001b[39;00m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Binary operation on GeoSeries objects that returns a Series\"\"\"\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m data, index \u001b[38;5;241m=\u001b[39m \u001b[43m_delegate_binary_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mthis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malign\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 77\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Series(data, index\u001b[38;5;241m=\u001b[39mindex)\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\base.py:60\u001b[0m, in \u001b[0;36m_delegate_binary_method\u001b[1;34m(op, this, other, align, *args, **kwargs)\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;28mtype\u001b[39m(this), \u001b[38;5;28mtype\u001b[39m(other))\n\u001b[1;32m---> 60\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43ma_this\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 61\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data, this\u001b[38;5;241m.\u001b[39mindex\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\array.py:708\u001b[0m, in \u001b[0;36mGeometryArray.contains\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 707\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcontains\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[1;32m--> 708\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_binary_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontains\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\array.py:699\u001b[0m, in \u001b[0;36mGeometryArray._binary_method\u001b[1;34m(op, left, right, **kwargs)\u001b[0m\n\u001b[0;32m 696\u001b[0m _crs_mismatch_warn(left, right, stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m7\u001b[39m)\n\u001b[0;32m 697\u001b[0m right \u001b[38;5;241m=\u001b[39m right\u001b[38;5;241m.\u001b[39m_data\n\u001b[1;32m--> 699\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mshapely\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\shapely\\decorators.py:77\u001b[0m, in \u001b[0;36mmultithreading_enabled.<locals>.wrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m arr \u001b[38;5;129;01min\u001b[39;00m array_args:\n\u001b[0;32m 76\u001b[0m arr\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m---> 77\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 78\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 79\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m arr, old_flag \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(array_args, old_flags):\n",
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\shapely\\predicates.py:526\u001b[0m, in \u001b[0;36mcontains\u001b[1;34m(a, b, **kwargs)\u001b[0m\n\u001b[0;32m 472\u001b[0m \u001b[38;5;129m@multithreading_enabled\u001b[39m\n\u001b[0;32m 473\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcontains\u001b[39m(a, b, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 474\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns True if geometry B is completely inside geometry A.\u001b[39;00m\n\u001b[0;32m 475\u001b[0m \n\u001b[0;32m 476\u001b[0m \u001b[38;5;124;03m A contains B if no points of B lie in the exterior of A and at least one\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 524\u001b[0m \u001b[38;5;124;03m False\u001b[39;00m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 526\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontains\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[1;31mGEOSException\u001b[0m: TopologyException: side location conflict at -80.552238000000003 37.161718. This can occur if the input geometry is invalid."
]
}
],
"source": [
"for lon, lat in zip(lons, lats):\n",
" cell = usa[usa.contains(gpd.points_from_xy([lon], [lat])[0])]\t\n",
" print(cell)"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lon</th>\n",
" <th>lat</th>\n",
" <th>geometry</th>\n",
" <th>index_right</th>\n",
" <th>iso</th>\n",
" <th>fid_2</th>\n",
" <th>cell_id</th>\n",
" <th>sbcll_d</th>\n",
" <th>s__0_25</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-89.3985</td>\n",
" <td>39.7392</td>\n",
" <td>POINT (-89.3985 39.7392)</td>\n",
" <td>258630</td>\n",
" <td>USA</td>\n",
" <td>289450.0</td>\n",
" <td>18091</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lon lat geometry index_right iso fid_2 \\\n",
"0 -89.3985 39.7392 POINT (-89.3985 39.7392) 258630 USA 289450.0 \n",
"\n",
" cell_id sbcll_d s__0_25 \n",
"0 18091 3 2 "
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# just as above we dont find the cell that contains the second set of coordinates\n",
"cell_sjoin = gpd.sjoin(gdf, usa, how='inner', predicate='within')\n",
"cell_sjoin"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using `make_valid` just supresses the error instead of finding the correct cell"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Remaining invalid geometries: 0\n"
]
}
],
"source": [
"from shapely.validation import make_valid\n",
"\n",
"usa.geometry = usa.geometry.apply(lambda geom: make_valid(geom) if not geom.is_valid else geom)\n",
"\n",
"# Check if all geometries are now valid\n",
"print(f\"Remaining invalid geometries: {len(usa[~usa.geometry.is_valid])}\")"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" iso fid_2 cell_id sbcll_d s__0_25 \\\n",
"258630 USA 289450.0 18091 3 2 \n",
"\n",
" geometry \n",
"258630 POLYGON ((-89.25 39.5, -89.5 39.5, -89.5 39.75... \n",
"Empty GeoDataFrame\n",
"Columns: [iso, fid_2, cell_id, sbcll_d, s__0_25, geometry]\n",
"Index: []\n"
]
}
],
"source": [
"for lon, lat in zip(lons, lats):\n",
" cell = usa[usa.contains(gpd.points_from_xy([lon], [lat])[0])]\t\n",
" print(cell)"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lon</th>\n",
" <th>lat</th>\n",
" <th>geometry</th>\n",
" <th>index_right</th>\n",
" <th>iso</th>\n",
" <th>fid_2</th>\n",
" <th>cell_id</th>\n",
" <th>sbcll_d</th>\n",
" <th>s__0_25</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-89.3985</td>\n",
" <td>39.7392</td>\n",
" <td>POINT (-89.3985 39.7392)</td>\n",
" <td>258630</td>\n",
" <td>USA</td>\n",
" <td>289450.0</td>\n",
" <td>18091</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lon lat geometry index_right iso fid_2 \\\n",
"0 -89.3985 39.7392 POINT (-89.3985 39.7392) 258630 USA 289450.0 \n",
"\n",
" cell_id sbcll_d s__0_25 \n",
"0 18091 3 2 "
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cell_sjoin = gpd.sjoin(gdf, usa, how='inner', predicate='within')\n",
"cell_sjoin"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using `buffer` does the same. I'd want to actually get the real cell id"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
"usa.geometry = usa.geometry.apply(lambda geom: geom.buffer(0) if not geom.is_valid else geom)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Invalid geometries: 0\n"
]
}
],
"source": [
"invalid = usa.loc[~usa.geometry.is_valid] #Select rows which are not (~) valid\n",
"print(f\"Invalid geometries: {len(invalid)}\")"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" fid_2 cell_id sbcll_d s__0_25\n",
"258630 289450.0 18091 3 2\n",
"Empty DataFrame\n",
"Columns: [fid_2, cell_id, sbcll_d, s__0_25]\n",
"Index: []\n"
]
}
],
"source": [
"for lon, lat in zip(lons, lats):\n",
" cell = usa[usa.contains(gpd.points_from_xy([lon], [lat])[0])]\t\n",
" print(cell[['fid_2', 'cell_id', 'sbcll_d', 's__0_25']])"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lon</th>\n",
" <th>lat</th>\n",
" <th>geometry</th>\n",
" <th>index_right</th>\n",
" <th>iso</th>\n",
" <th>fid_2</th>\n",
" <th>cell_id</th>\n",
" <th>sbcll_d</th>\n",
" <th>s__0_25</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-89.3985</td>\n",
" <td>39.7392</td>\n",
" <td>POINT (-89.3985 39.7392)</td>\n",
" <td>258630</td>\n",
" <td>USA</td>\n",
" <td>289450.0</td>\n",
" <td>18091</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lon lat geometry index_right iso fid_2 \\\n",
"0 -89.3985 39.7392 POINT (-89.3985 39.7392) 258630 USA 289450.0 \n",
"\n",
" cell_id sbcll_d s__0_25 \n",
"0 18091 3 2 "
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cell_sjoin = gpd.sjoin(gdf, usa, how='inner', predicate='within')\n",
"cell_sjoin"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment