Last active
April 5, 2025 20:15
-
-
Save armsp/094a03fd7084ac54597b9d50d2cd9bc0 to your computer and use it in GitHub Desktop.
Notebook that shows the issue with finding the cell that contains a given coordinate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import geopandas as gpd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"usa = gpd.read_file('D:/ai/Climate/satellite_nature/0_25deg/shapefile')\n", | |
"usa = usa[usa['iso'] == 'USA']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Invalid geometries: 28\n" | |
] | |
} | |
], | |
"source": [ | |
"invalid = usa.loc[~usa.geometry.is_valid] #Select rows which are not (~) valid\n", | |
"print(f\"Invalid geometries: {len(invalid)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# list of coordinates that cause issues\n", | |
"\n", | |
"lons = [-89.3985, -80.552238000000003]\n", | |
"lats = [39.7392, 37.161718]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 110, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame({'lon': lons, 'lat': lats})" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Just to show that the coordinates are within USA" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"\n", | |
"<style>\n", | |
" #altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee.vega-embed {\n", | |
" width: 100%;\n", | |
" display: flex;\n", | |
" }\n", | |
"\n", | |
" #altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee.vega-embed details,\n", | |
" #altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee.vega-embed details summary {\n", | |
" position: relative;\n", | |
" }\n", | |
"</style>\n", | |
"<div id=\"altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee\"></div>\n", | |
"<script type=\"text/javascript\">\n", | |
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n", | |
" (function(spec, embedOpt){\n", | |
" let outputDiv = document.currentScript.previousElementSibling;\n", | |
" if (outputDiv.id !== \"altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee\") {\n", | |
" outputDiv = document.getElementById(\"altair-viz-fcbd3b3a789846a08f0cf4f68bbd82ee\");\n", | |
" }\n", | |
"\n", | |
" const paths = {\n", | |
" \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n", | |
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n", | |
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm/[email protected]?noext\",\n", | |
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n", | |
" };\n", | |
"\n", | |
" function maybeLoadScript(lib, version) {\n", | |
" var key = `${lib.replace(\"-\", \"\")}_version`;\n", | |
" return (VEGA_DEBUG[key] == version) ?\n", | |
" Promise.resolve(paths[lib]) :\n", | |
" new Promise(function(resolve, reject) {\n", | |
" var s = document.createElement('script');\n", | |
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n", | |
" s.async = true;\n", | |
" s.onload = () => {\n", | |
" VEGA_DEBUG[key] = version;\n", | |
" return resolve(paths[lib]);\n", | |
" };\n", | |
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n", | |
" s.src = paths[lib];\n", | |
" });\n", | |
" }\n", | |
"\n", | |
" function showError(err) {\n", | |
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n", | |
" throw err;\n", | |
" }\n", | |
"\n", | |
" function displayChart(vegaEmbed) {\n", | |
" vegaEmbed(outputDiv, spec, embedOpt)\n", | |
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n", | |
" }\n", | |
"\n", | |
" if(typeof define === \"function\" && define.amd) {\n", | |
" requirejs.config({paths});\n", | |
" let deps = [\"vega-embed\"];\n", | |
" require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n", | |
" } else {\n", | |
" maybeLoadScript(\"vega\", \"5\")\n", | |
" .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n", | |
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n", | |
" .catch(showError)\n", | |
" .then(() => displayChart(vegaEmbed));\n", | |
" }\n", | |
" })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"layer\": [{\"data\": {\"url\": \"https://cdn.jsdelivr.net/npm/[email protected]/data/us-10m.json\", \"format\": {\"feature\": \"states\", \"type\": \"topojson\"}}, \"mark\": {\"type\": \"geoshape\", \"fill\": \"lightgray\", \"stroke\": \"white\"}, \"projection\": {\"type\": \"albersUsa\"}}, {\"data\": {\"name\": \"data-d1f1eed0de29e3cf3ac88c46d8170b48\"}, \"mark\": {\"type\": \"point\", \"color\": \"red\", \"shape\": \"cross\"}, \"encoding\": {\"latitude\": {\"field\": \"lat\", \"type\": \"quantitative\"}, \"longitude\": {\"field\": \"lon\", \"type\": \"quantitative\"}}}], \"height\": 800, \"width\": 1000, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-d1f1eed0de29e3cf3ac88c46d8170b48\": [{\"lon\": -89.3985, \"lat\": 39.7392}, {\"lon\": -80.552238, \"lat\": 37.161718}]}}, {\"mode\": \"vega-lite\"});\n", | |
"</script>" | |
], | |
"text/plain": [ | |
"alt.LayerChart(...)" | |
] | |
}, | |
"execution_count": 111, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import altair as alt\n", | |
"alt.data_transformers.disable_max_rows()\n", | |
"from vega_datasets import data\n", | |
"\n", | |
"# Read in polygons from topojson\n", | |
"states = alt.topo_feature(data.us_10m.url, feature='states')\n", | |
"# US states background\n", | |
"background = alt.Chart(states).mark_geoshape(\n", | |
" fill='lightgray',\n", | |
" stroke='white'\n", | |
").properties(\n", | |
" width=1000,\n", | |
" height=800\n", | |
").project('albersUsa')\n", | |
"f = alt.Chart(df).mark_point(shape='cross', color='red').encode(longitude='lon', latitude='lat')\n", | |
"background + f" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=\"EPSG:4326\") # WGS84 # to use longitude and latitude you need to set the coordinate reference system (CRS) to WGS84" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>lon</th>\n", | |
" <th>lat</th>\n", | |
" <th>geometry</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-89.398500</td>\n", | |
" <td>39.739200</td>\n", | |
" <td>POINT (-89.3985 39.7392)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-80.552238</td>\n", | |
" <td>37.161718</td>\n", | |
" <td>POINT (-80.55224 37.16172)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" lon lat geometry\n", | |
"0 -89.398500 39.739200 POINT (-89.3985 39.7392)\n", | |
"1 -80.552238 37.161718 POINT (-80.55224 37.16172)" | |
] | |
}, | |
"execution_count": 113, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gdf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" iso fid_2 cell_id sbcll_d s__0_25 \\\n", | |
"258630 USA 289450.0 18091 3 2 \n", | |
"\n", | |
" geometry \n", | |
"258630 POLYGON ((-89.25 39.5, -89.5 39.5, -89.5 39.75... \n" | |
] | |
}, | |
{ | |
"ename": "GEOSException", | |
"evalue": "TopologyException: side location conflict at -80.552238000000003 37.161718. This can occur if the input geometry is invalid.", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mGEOSException\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[1;32mIn[114], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m lon, lat \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(lons, lats):\n\u001b[1;32m----> 2\u001b[0m cell \u001b[38;5;241m=\u001b[39m usa[\u001b[43musa\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontains\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpoints_from_xy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlon\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mlat\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m]\t\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(cell)\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\base.py:2298\u001b[0m, in \u001b[0;36mGeoPandasBase.contains\u001b[1;34m(self, other, align)\u001b[0m\n\u001b[0;32m 2184\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcontains\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, align\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 2185\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for\u001b[39;00m\n\u001b[0;32m 2186\u001b[0m \u001b[38;5;124;03m each aligned geometry that contains `other`.\u001b[39;00m\n\u001b[0;32m 2187\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2296\u001b[0m \u001b[38;5;124;03m GeoSeries.within\u001b[39;00m\n\u001b[0;32m 2297\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_binary_op\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontains\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malign\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\base.py:76\u001b[0m, in \u001b[0;36m_binary_op\u001b[1;34m(op, this, other, align, *args, **kwargs)\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_binary_op\u001b[39m(op, this, other, align, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 74\u001b[0m \u001b[38;5;66;03m# type: (str, GeoSeries, GeoSeries, args/kwargs) -> Series[bool/float]\u001b[39;00m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Binary operation on GeoSeries objects that returns a Series\"\"\"\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m data, index \u001b[38;5;241m=\u001b[39m \u001b[43m_delegate_binary_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mthis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malign\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 77\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Series(data, index\u001b[38;5;241m=\u001b[39mindex)\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\base.py:60\u001b[0m, in \u001b[0;36m_delegate_binary_method\u001b[1;34m(op, this, other, align, *args, **kwargs)\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;28mtype\u001b[39m(this), \u001b[38;5;28mtype\u001b[39m(other))\n\u001b[1;32m---> 60\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43ma_this\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 61\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m data, this\u001b[38;5;241m.\u001b[39mindex\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\array.py:708\u001b[0m, in \u001b[0;36mGeometryArray.contains\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 707\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcontains\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[1;32m--> 708\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_binary_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontains\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\geopandas\\array.py:699\u001b[0m, in \u001b[0;36mGeometryArray._binary_method\u001b[1;34m(op, left, right, **kwargs)\u001b[0m\n\u001b[0;32m 696\u001b[0m _crs_mismatch_warn(left, right, stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m7\u001b[39m)\n\u001b[0;32m 697\u001b[0m right \u001b[38;5;241m=\u001b[39m right\u001b[38;5;241m.\u001b[39m_data\n\u001b[1;32m--> 699\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mshapely\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\shapely\\decorators.py:77\u001b[0m, in \u001b[0;36mmultithreading_enabled.<locals>.wrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m arr \u001b[38;5;129;01min\u001b[39;00m array_args:\n\u001b[0;32m 76\u001b[0m arr\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m---> 77\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 78\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 79\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m arr, old_flag \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(array_args, old_flags):\n", | |
"File \u001b[1;32mc:\\Users\\joe\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\shapely\\predicates.py:526\u001b[0m, in \u001b[0;36mcontains\u001b[1;34m(a, b, **kwargs)\u001b[0m\n\u001b[0;32m 472\u001b[0m \u001b[38;5;129m@multithreading_enabled\u001b[39m\n\u001b[0;32m 473\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcontains\u001b[39m(a, b, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 474\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns True if geometry B is completely inside geometry A.\u001b[39;00m\n\u001b[0;32m 475\u001b[0m \n\u001b[0;32m 476\u001b[0m \u001b[38;5;124;03m A contains B if no points of B lie in the exterior of A and at least one\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 524\u001b[0m \u001b[38;5;124;03m False\u001b[39;00m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 526\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontains\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"\u001b[1;31mGEOSException\u001b[0m: TopologyException: side location conflict at -80.552238000000003 37.161718. This can occur if the input geometry is invalid." | |
] | |
} | |
], | |
"source": [ | |
"for lon, lat in zip(lons, lats):\n", | |
" cell = usa[usa.contains(gpd.points_from_xy([lon], [lat])[0])]\t\n", | |
" print(cell)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>lon</th>\n", | |
" <th>lat</th>\n", | |
" <th>geometry</th>\n", | |
" <th>index_right</th>\n", | |
" <th>iso</th>\n", | |
" <th>fid_2</th>\n", | |
" <th>cell_id</th>\n", | |
" <th>sbcll_d</th>\n", | |
" <th>s__0_25</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-89.3985</td>\n", | |
" <td>39.7392</td>\n", | |
" <td>POINT (-89.3985 39.7392)</td>\n", | |
" <td>258630</td>\n", | |
" <td>USA</td>\n", | |
" <td>289450.0</td>\n", | |
" <td>18091</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" lon lat geometry index_right iso fid_2 \\\n", | |
"0 -89.3985 39.7392 POINT (-89.3985 39.7392) 258630 USA 289450.0 \n", | |
"\n", | |
" cell_id sbcll_d s__0_25 \n", | |
"0 18091 3 2 " | |
] | |
}, | |
"execution_count": 115, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# just as above we dont find the cell that contains the second set of coordinates\n", | |
"cell_sjoin = gpd.sjoin(gdf, usa, how='inner', predicate='within')\n", | |
"cell_sjoin" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Using `make_valid` just supresses the error instead of finding the correct cell" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 116, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Remaining invalid geometries: 0\n" | |
] | |
} | |
], | |
"source": [ | |
"from shapely.validation import make_valid\n", | |
"\n", | |
"usa.geometry = usa.geometry.apply(lambda geom: make_valid(geom) if not geom.is_valid else geom)\n", | |
"\n", | |
"# Check if all geometries are now valid\n", | |
"print(f\"Remaining invalid geometries: {len(usa[~usa.geometry.is_valid])}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" iso fid_2 cell_id sbcll_d s__0_25 \\\n", | |
"258630 USA 289450.0 18091 3 2 \n", | |
"\n", | |
" geometry \n", | |
"258630 POLYGON ((-89.25 39.5, -89.5 39.5, -89.5 39.75... \n", | |
"Empty GeoDataFrame\n", | |
"Columns: [iso, fid_2, cell_id, sbcll_d, s__0_25, geometry]\n", | |
"Index: []\n" | |
] | |
} | |
], | |
"source": [ | |
"for lon, lat in zip(lons, lats):\n", | |
" cell = usa[usa.contains(gpd.points_from_xy([lon], [lat])[0])]\t\n", | |
" print(cell)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 118, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>lon</th>\n", | |
" <th>lat</th>\n", | |
" <th>geometry</th>\n", | |
" <th>index_right</th>\n", | |
" <th>iso</th>\n", | |
" <th>fid_2</th>\n", | |
" <th>cell_id</th>\n", | |
" <th>sbcll_d</th>\n", | |
" <th>s__0_25</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-89.3985</td>\n", | |
" <td>39.7392</td>\n", | |
" <td>POINT (-89.3985 39.7392)</td>\n", | |
" <td>258630</td>\n", | |
" <td>USA</td>\n", | |
" <td>289450.0</td>\n", | |
" <td>18091</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" lon lat geometry index_right iso fid_2 \\\n", | |
"0 -89.3985 39.7392 POINT (-89.3985 39.7392) 258630 USA 289450.0 \n", | |
"\n", | |
" cell_id sbcll_d s__0_25 \n", | |
"0 18091 3 2 " | |
] | |
}, | |
"execution_count": 118, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cell_sjoin = gpd.sjoin(gdf, usa, how='inner', predicate='within')\n", | |
"cell_sjoin" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Using `buffer` does the same. I'd want to actually get the real cell id" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 119, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"usa.geometry = usa.geometry.apply(lambda geom: geom.buffer(0) if not geom.is_valid else geom)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 120, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Invalid geometries: 0\n" | |
] | |
} | |
], | |
"source": [ | |
"invalid = usa.loc[~usa.geometry.is_valid] #Select rows which are not (~) valid\n", | |
"print(f\"Invalid geometries: {len(invalid)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 121, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" fid_2 cell_id sbcll_d s__0_25\n", | |
"258630 289450.0 18091 3 2\n", | |
"Empty DataFrame\n", | |
"Columns: [fid_2, cell_id, sbcll_d, s__0_25]\n", | |
"Index: []\n" | |
] | |
} | |
], | |
"source": [ | |
"for lon, lat in zip(lons, lats):\n", | |
" cell = usa[usa.contains(gpd.points_from_xy([lon], [lat])[0])]\t\n", | |
" print(cell[['fid_2', 'cell_id', 'sbcll_d', 's__0_25']])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 122, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>lon</th>\n", | |
" <th>lat</th>\n", | |
" <th>geometry</th>\n", | |
" <th>index_right</th>\n", | |
" <th>iso</th>\n", | |
" <th>fid_2</th>\n", | |
" <th>cell_id</th>\n", | |
" <th>sbcll_d</th>\n", | |
" <th>s__0_25</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-89.3985</td>\n", | |
" <td>39.7392</td>\n", | |
" <td>POINT (-89.3985 39.7392)</td>\n", | |
" <td>258630</td>\n", | |
" <td>USA</td>\n", | |
" <td>289450.0</td>\n", | |
" <td>18091</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" lon lat geometry index_right iso fid_2 \\\n", | |
"0 -89.3985 39.7392 POINT (-89.3985 39.7392) 258630 USA 289450.0 \n", | |
"\n", | |
" cell_id sbcll_d s__0_25 \n", | |
"0 18091 3 2 " | |
] | |
}, | |
"execution_count": 122, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cell_sjoin = gpd.sjoin(gdf, usa, how='inner', predicate='within')\n", | |
"cell_sjoin" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment