Created
May 23, 2024 12:24
-
-
Save bubbobne/47f55ede17d95c82b3a298567fcba6e3 to your computer and use it in GitHub Desktop.
Copare sLOO of kriging with measured data and also nearest station
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "2a1620cd-344e-4168-ae12-2b64eabd6a66", | |
"metadata": {}, | |
"source": [ | |
"# LOO kriging" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "77087d35-4cef-4d37-8220-8892e15dd400", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import matplotlib.pyplot as plt\n", | |
"import matplotlib.style as style\n", | |
"from geoframepy.timeseries.io_csv import*\n", | |
"import matplotlib.pylab as pylab\n", | |
"import plotly as py\n", | |
"import plotly.graph_objs as go\n", | |
"os.chdir(\"/home/andreisd/Documents/project/GWS2022/NON_SCALE/data/\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "412eff72-1a5e-4cad-915c-1cf045c6f140", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# plot meteostations\n", | |
"\n", | |
"import geopandas as gpd\n", | |
"\n", | |
"staz=gpd.read_file(\"./meteo_data/Station2024.shp\")\n", | |
"staz['coords'] = staz['geometry'].apply(lambda x: x.representative_point().coords[:]) \n", | |
"staz['coords'] = [coords[0][:2] for coords in staz['coords']] # no z\n", | |
"fig=plt.figure(figsize=(20,10))\n", | |
"ax=fig.add_subplot(111)\n", | |
"staz.plot(ax=ax, label='meteostations', color='red', edgecolor='k', markersize=100)\n", | |
"ax.legend()\n", | |
"for idx, row in staz.iterrows():\n", | |
" plt.annotate(text=row['ID'], xy=row['coords'], horizontalalignment='right', color='k')\n", | |
"ax.legend()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "fb4362b2-ebfa-40c4-a406-a37c10948925", | |
"metadata": {}, | |
"source": [ | |
"## Temperature\n", | |
"\n", | |
"Plot from the leave one out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "824a65f8-76d6-4549-8664-9245658b6ac1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"start_date = \"2012-01-01 01:00\"\n", | |
"measureded =pandas_read_OMS_timeseries(\"meteo_data/temperature_cleaned.csv\")\n", | |
"estimated =pandas_read_OMS_timeseries(\"LOO_kriging/new_kriging_temperature_trend.csv\")\n", | |
"\n", | |
"estimated = estimated[estimated.index >= start_date]\n", | |
"measureded = measureded[measureded.index >= start_date]\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "05cdb3f6-5a78-47b8-98a9-7222bdd7c753", | |
"metadata": {}, | |
"source": [ | |
"### nearest station" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "644268fc-0bfa-4fbc-bd0f-6a16f4b7cc9c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"colnames = measureded.columns\n", | |
"\n", | |
"def get_nearest(row, df):\n", | |
" # Check if the geometry is valid and exists\n", | |
" point = row.geometry\n", | |
" if point is None or not point.is_valid:\n", | |
" return pd.Series([None, None])\n", | |
"\n", | |
" # Retrieve the nearest geometry using the spatial index\n", | |
" # Ensure the input to nearest is correctly formatted\n", | |
" try:\n", | |
" # Query the spatial index\n", | |
" temp = df.drop(row.name, axis=0)\n", | |
" lower_bound = row.z_dem - 300\n", | |
" upper_bound = row.z_dem + 300\n", | |
" mask = (temp['z_dem'] >= lower_bound) & (temp['z_dem'] <= upper_bound)\n", | |
" temp = temp[mask]\n", | |
" sindex = temp.sindex\n", | |
" nearest_idx = list(sindex.nearest(point, 1, return_distance = True))\n", | |
"\n", | |
" nearest_point = temp.iloc[nearest_idx[0][1][0]]\n", | |
" nearest_distance = nearest_idx[1][0]\n", | |
" return pd.Series([int(nearest_point['ID']), nearest_distance])\n", | |
" except Exception as e:\n", | |
" # Handle any exceptions that may arise and print them to understand the issue\n", | |
" print(f\"Error processing row: {e}\")\n", | |
" return pd.Series([None, None])\n", | |
" \n", | |
"staz[\"strID\"] = staz['ID'].astype(str)\n", | |
"valid_points_gdf = staz[staz['strID'].isin(colnames)]\n", | |
"sindex = valid_points_gdf.sindex\n", | |
"\n", | |
"\n", | |
"# Apply the function to the original dataframe\n", | |
"valid_points_gdf[['nearest_id', 'nearest_distance']] = valid_points_gdf.apply(lambda row: get_nearest(row, valid_points_gdf), axis=1)\n", | |
"\n", | |
"# plot station against closest centroid\n", | |
"fig=plt.figure(figsize=(25,40))\n", | |
"measureded =pandas_read_OMS_timeseries(\"meteo_data/temperature_cleaned.csv\")\n", | |
"\n", | |
"xv=np.linspace(-20,40,100)\n", | |
"\n", | |
"pos=1\n", | |
"colnames = measureded.columns\n", | |
"\n", | |
"for idx, row in valid_points_gdf.iterrows():\n", | |
" station_name =str(row[\"ID\"])\n", | |
" nearste_st = str(int(row[\"nearest_id\"]))\n", | |
" d = str(row[\"nearest_distance\"])\n", | |
" ax=fig.add_subplot(9,5,pos)\n", | |
" ax.scatter(measureded[station_name],measureded[nearste_st] , s=40, color='r', alpha=0.25, edgecolor='r')\n", | |
" ax.plot(xv,xv, c='k')\n", | |
" ax.set_title('station %s vs station %s at distance %s' % (station_name,nearste_st,d) )\n", | |
" ax.set_xlabel('Measured Station ')\n", | |
" ax.set_ylabel('Nearste ')\n", | |
" ax.axis('equal')\n", | |
" temp_corr = measureded[station_name].corr(measureded[nearste_st])\n", | |
" ax.text(15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n", | |
" ax.set_xlim([-25,45])\n", | |
" ax.set_ylim([-25,45])\n", | |
" ax.grid()\n", | |
" pos=pos+1\n", | |
" \n", | |
"fig.tight_layout()\n", | |
"fig.savefig('./temp_station.png', dpi=300)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5a14e4bf-4c46-427e-8b57-3ddf82b72186", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for idx, row in valid_points_gdf.iterrows():\n", | |
" station_name =str(row[\"ID\"])\n", | |
" nearste_st = str(int(row[\"nearest_id\"]))\n", | |
" d = measureded[station_name] - measureded[nearste_st]\n", | |
" d = d.abs()\n", | |
" mask = d>15\n", | |
" d = d[mask]\n", | |
" print(row.ID)\n", | |
" print(d)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "143a9091-5ed1-495b-a008-b81eef86b127", | |
"metadata": {}, | |
"source": [ | |
"### LOO" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9605ef96-6c45-470c-b0ab-398596aa7b3c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# plot station against closest centroid\n", | |
"fig=plt.figure(figsize=(25,40))\n", | |
"\n", | |
"xv=np.linspace(-20,40,100)\n", | |
"\n", | |
"pos=1\n", | |
"\n", | |
"for station_name in colnames:\n", | |
" station_name =str(station_name)\n", | |
" ax=fig.add_subplot(9,5,pos)\n", | |
" ax.scatter(measureded[station_name],estimated[station_name] , s=40, color='r', alpha=0.25, edgecolor='r')\n", | |
" ax.plot(xv,xv, c='k')\n", | |
" ax.set_title('TEMP - station %s' % station_name)\n", | |
" ax.set_xlabel('Measured Station ')\n", | |
" ax.set_ylabel('Interpolated Centoid ')\n", | |
" ax.axis('equal')\n", | |
" temp_corr = measureded[station_name].corr(estimated[station_name])\n", | |
" ax.text(-15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n", | |
" ax.set_xlim([-25,45])\n", | |
" ax.set_ylim([-25,45])\n", | |
" ax.grid()\n", | |
" pos=pos+1\n", | |
" \n", | |
"fig.tight_layout()\n", | |
"fig.savefig('./temp_kriging.png', dpi=300)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b73f2c2f-7d6a-46e5-bc34-486055ef66f9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"t = measureded['473']\n", | |
"mask = t>22\n", | |
"t[mask]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1551bf9c-4533-4e13-b76f-0feb6961ceda", | |
"metadata": {}, | |
"source": [ | |
"## Precipitation \n", | |
"\n", | |
"LOO " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "30a6dc77-76d2-4da9-8e23-1f8894b6b1de", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"start_date = \"2012-01-01 01:00\"\n", | |
"measureded =pandas_read_OMS_timeseries(\"meteo_data/precipitation_cleaned.csv\")\n", | |
"estimated =pandas_read_OMS_timeseries(\"LOO_kriging/new_kriging_precipitation_trend_10closest.csv\")\n", | |
"\n", | |
"estimated = estimated[estimated.index >= start_date]\n", | |
"measureded = measureded[measureded.index >= start_date]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b93241a0-6d77-42b1-ad79-2aea17040925", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# plot station against closest centroid\n", | |
"fig=plt.figure(figsize=(25,40))\n", | |
"\n", | |
"xv=np.linspace(-20,40,100)\n", | |
"\n", | |
"pos=1\n", | |
"colnames = measureded.columns\n", | |
"\n", | |
"for station_name in colnames:\n", | |
" station_name =str(station_name)\n", | |
" ax=fig.add_subplot(9,5,pos)\n", | |
" ax.scatter(measureded[station_name],estimated[station_name] , s=40, color='b', alpha=0.25, edgecolor='b')\n", | |
" ax.plot(xv,xv, c='k')\n", | |
" ax.set_title('Precipitation - station %s' % station_name)\n", | |
" ax.set_xlabel('Measured Station ')\n", | |
" ax.set_ylabel('Interpolated Centoid ')\n", | |
" ax.axis('equal')\n", | |
" temp_corr = measureded[station_name].corr(estimated[station_name])\n", | |
" ax.text(15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n", | |
" ax.set_xlim([0,30])\n", | |
" ax.set_ylim([0,40])\n", | |
" ax.grid()\n", | |
" pos=pos+1\n", | |
" \n", | |
"fig.tight_layout()\n", | |
"fig.savefig('./precip_kriging.png', dpi=300)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "2bcfe192-a900-41e3-b7dc-de8500a4f8be", | |
"metadata": {}, | |
"source": [ | |
"## Precipitation\n", | |
"\n", | |
"Nearest station" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "7a667aa8-0d5c-4b62-915c-78b449de2093", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"staz[\"strID\"] = staz['ID'].astype(str)\n", | |
"valid_points_gdf = staz[staz['strID'].isin(colnames)]\n", | |
"sindex = valid_points_gdf.sindex\n", | |
"\n", | |
"def get_nearest2(row, df):\n", | |
" # Check if the geometry is valid and exists\n", | |
" point = row.geometry\n", | |
" if point is None or not point.is_valid:\n", | |
" return pd.Series([None, None])\n", | |
"\n", | |
" # Retrieve the nearest geometry using the spatial index\n", | |
" # Ensure the input to nearest is correctly formatted\n", | |
" try:\n", | |
" # Query the spatial index\n", | |
" temp = df.drop(row.name, axis=0)\n", | |
" sindex = temp.sindex\n", | |
" nearest_idx = list(sindex.nearest(point, 1, return_distance = True))\n", | |
"\n", | |
" nearest_point = temp.iloc[nearest_idx[0][1][0]]\n", | |
" nearest_distance = nearest_idx[1][0]\n", | |
" return pd.Series([int(nearest_point['ID']), nearest_distance])\n", | |
" except Exception as e:\n", | |
" # Handle any exceptions that may arise and print them to understand the issue\n", | |
" print(f\"Error processing row: {e}\")\n", | |
" return pd.Series([None, None])\n", | |
"# Apply the function to the original dataframe\n", | |
"valid_points_gdf[['nearest_id', 'nearest_distance']] = valid_points_gdf.apply(lambda row: get_nearest2(row, valid_points_gdf), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "3be34e40-9866-49b2-9c9d-17e98e973f10", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"# plot station against closest centroid\n", | |
"fig=plt.figure(figsize=(25,40))\n", | |
"\n", | |
"xv=np.linspace(-20,40,100)\n", | |
"\n", | |
"pos=1\n", | |
"colnames = measureded.columns\n", | |
"\n", | |
"for idx, row in valid_points_gdf.iterrows():\n", | |
" station_name =str(row[\"ID\"])\n", | |
" nearste_st = str(int(row[\"nearest_id\"]))\n", | |
" d = str(row[\"nearest_distance\"])\n", | |
" ax=fig.add_subplot(9,5,pos)\n", | |
" ax.scatter(measureded[station_name],measureded[nearste_st] , s=40, color='b', alpha=0.25, edgecolor='b')\n", | |
" ax.plot(xv,xv, c='k')\n", | |
" ax.set_title('station %s vs station %s at distance %s' % (station_name,nearste_st,d) )\n", | |
" ax.set_xlabel('Measured Station ')\n", | |
" ax.set_ylabel('Nearste ')\n", | |
" ax.axis('equal')\n", | |
" temp_corr = measureded[station_name].corr(measureded[nearste_st])\n", | |
" ax.text(15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n", | |
" ax.set_xlim([0,30])\n", | |
" ax.set_ylim([0,40])\n", | |
" ax.grid()\n", | |
" pos=pos+1\n", | |
" \n", | |
"fig.tight_layout()\n", | |
"fig.savefig('./precip_station.png', dpi=300)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9ee0573c-3922-4e31-96db-2fb6f9d50fd2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a65576e7-c3f8-4adc-ba2a-957bbce8b34b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.12" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"state": {}, | |
"version_major": 2, | |
"version_minor": 0 | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment