Skip to content

Instantly share code, notes, and snippets.

@jshannon75
Created December 10, 2024 21:41
Show Gist options
  • Save jshannon75/7d29cb4bcde1114423e8e99d7399c416 to your computer and use it in GitHub Desktop.
Save jshannon75/7d29cb4bcde1114423e8e99d7399c416 to your computer and use it in GitHub Desktop.
---
title: "Loading ERA5 data in Python and processing with R"
format: html
editor: visual
editor_options:
chunk_output_type: console
---
```{r setup}
library(Rarr)
library(tidyverse)
library(lubridate)
library(raster)
library(ncdf4)
library(tmap)
library(sf)
library(gstat)
library(qgisprocess)
```
This is the code that loads the ERA data from the online Zarr file. More info on data available on this page:
https://cloud.google.com/storage/docs/public-datasets/era5
And this one:
https://github.com/google-research/arco-era5/tree/main
#Downloading data in R
The code below gets an overview the cloud-based single-level reanalysis data for ERA 5.
```{r}
s3_all<-"https://storage.googleapis.com/gcp-public-data-arco-era5/co/single-level-reanalysis.zarr-v2"
zarr_overview(s3_all)
```
The origin date for the data is Jan 1, 1900. Let's get a list of all hours/dates in the dataset (the `time` variable) and then calculate the dates for each hour time slice.
```{r}
#zarr_overview(paste0(s3_all,"/time"))
s3_time<-read_zarr_array(paste0(s3_all,"/time"))
origin_date <- as.POSIXct("1900-01-01 00:00:00", tz = "UTC")
s3_time_df<-data.frame(time=s3_time) %>%
mutate(date=origin_date+(time*(60*60)),
hour=hour(date),
month=month(date),
year=year(date),
date=date(date),
month_year=paste0(month,"/",year)) %>%
filter(date<today()-90 & date > mdy("12/31/1939"))
#350617-1092816 is the range of valid values for time, which roughly corresponds to the Jan 1940-Aug 2024 window. There's a lag in data availability, which is why the cutoff is 90 days before today.
date_list<-unique(s3_time_df$date)
#tail(date_list)
```
We can also download the universal lat/lon coordinates for the data grid and set up a blank raster within the boundaries of those points.
```{r}
lat_s3<-"https://storage.googleapis.com/gcp-public-data-arco-era5/co/single-level-reanalysis.zarr-v2/latitude"
lat<-data.frame(lat=read_zarr_array(lat_s3))
lon_s3<-"https://storage.googleapis.com/gcp-public-data-arco-era5/co/single-level-reanalysis.zarr-v2/longitude"
lon<-data.frame(lon_raw=read_zarr_array(lon_s3)) %>%
mutate(lon=if_else(lon_raw>180,lon_raw-360,lon_raw))
points<-bind_cols(lat,lon) %>%
st_as_sf(coords=c("lon","lat"),crs=4326)
st_write(points,"data/latlon.gpkg")
extent<-extent(-180,180,-90,90)
r <- raster(extent, res = 0.5,crs=4326)
grid<-st_as_sf(st_make_grid(r,cellsize=0.4,square = FALSE)) %>%
mutate(grid_id=paste0("G",str_pad(row_number(),6,pad="0")))
# Attempt to write to NetCDF
# # Get raster properties
# lon_r <- xFromCol(r_sel, 1:nrow(r)) # Longitude values
# lat_r <- yFromRow(r_sel, 1:ncol(r)) # Latitude values
#
# # Create dimensions
# lon_dim <- ncdim_def("lon", "degrees_east", vals = lon_r, longname = "Longitude")
# lat_dim <- ncdim_def("lat", "degrees_north", vals = lat_r, longname = "Latitude")
#
# # Define CRS metadata
# crs <- ncvar_def("crs", "", dim = list(), longname = "Coordinate Reference System",
# grid_mapping_name = "latitude_longitude",
# epsg_code = "EPSG:4326")
```
Now we can download data for a specific variable. The code below goes by each day within a range (24 hours), calculates a summary statistic (mean temp at 2 meters in this case), and then rasterizes those values. The last part of the code writes a GeoTIFF version of the data.
```{r}
model_data_s3 <- "https://storage.googleapis.com/gcp-public-data-arco-era5/co/single-level-reanalysis.zarr-v2/t2m"
date_sel=date_list[20000]
# Define the main data variable
# nc_file<-"data/era5_data/rasters/era5_data_t2m.nc"
#
# t2m_mean_var <- ncvar_def("T2m_mean", "degrees Kelvin", dim = list(lon_dim, lat_dim),
# longname = "Mean surface temperature")
#
# # Create NetCDF file
# nc <- nc_create(nc_file, list(lon_dim, lat_dim, crs, t2m_mean_var))
date_avg<-function(date_sel){
date_list_sel<-s3_time_df %>%
filter(date==date_sel)
start_row=min(date_list_sel$time)
end_row=max(date_list_sel$time)
var_data<-read_zarr_array(model_data_s3,index=list(c(start_row,end_row),NULL))
var_mean<-apply(var_data,c(2),mean)
var_latlon<-data.frame(date=date_sel,
var=var_mean,
lon_raw=lon$lon,
lat=lat) %>%
mutate(lon=if_else(lon_raw>180,lon_raw-360,lon_raw)) %>%
st_as_sf(coords=c("lon","lat"),crs=4326)
qgis_show_help("saga:thinplatespline")
file_out<-paste0("rasters/",date_sel,".tif")
output <- qgis_run_algorithm(
"saga:thinplatespline",
SHAPES = var_latlon, # Input point layer
FIELD = "var", # Field containing values to interpolate
# TARGER_USER_XMIN_TARGET_USER_XMAX = "-180,180,-90,90",
# TARGET_USER_XMIN = "-180", # Minimum X for extent
# TARGET_USER_XMAX = 180, # Maximum X for extent
# TARGET_USER_YMIN = -90, # Minimum Y for extent
# TARGET_USER_YMAX = 90, # Maximum Y for extent
TARGET_USER_SIZE = 0.5, # Cell size for the raster
TARGET_OUT_GRID = file_out
)
# point_sum<-var_latlon %>%
# st_join(grid,join=st_within) %>%
# group_by(grid_id,date) %>%
# st_set_geometry(NULL) %>%
# summarise(var_mean=mean(var))
# grid_var<-grid %>%
# inner_join(point_sum)
#
# tm_shape(grid_var)+tm_borders()
# st_write(grid_var,"data/grid_temp.gpkg")
#
# r_sel<-idw(var~1,var_latlon,stars::st_as_stars(r))
# r_sel<-rasterize(var_latlon,r,field="var",fun=mean,background=NA)
# r_sel1<-terra::focal(r_sel,w=matrix(1/9, nc=5, nr=5),fun=mean,na.policy="only")
# writeRaster(r_sel,paste0("data/era5_data/rasters/",date_sel,".tif"))
}
dates_sel<-s3_time_df %>%
#filter(date %in% c("1940","1950","1960")) %>%
filter(month_year=="7/1975" & date>"1975-07-07") %>%
distinct(date)
map(dates_sel$date,date_avg)
```
Create a gif map
```{r}
library(rnaturalearth)
library(tmaptools)
rasters<-list.files(path="rasters",pattern="1975",full.names=TRUE)
rasters <- rasters[grepl("tif", rasters)]
world <- ne_countries(scale = "medium", returnclass = "sf")
raster_map<-function(file_sel){
#file_sel<-"rasters/1975-07-25.tif"
data1<-raster(file_sel)
data_date<-substr(file_sel,9,18)
map<-tm_shape(data1)+
tm_raster(style= "quantile", n=7,
palette=get_brewer_pal("OrRd", n = 7, plot=FALSE))+
tm_layout(legend.show = FALSE,
main.title=data_date,
main.title.size=1)+
tm_shape(world)+
tm_borders(col="black")
tmap_save(map,paste0("rasters/image/map_",data_date,".png"),
width=10,height=5)
}
map(rasters,raster_map)
raster_maps<-list.files(path="rasters/image",pattern="png",full.names=TRUE)
library(gifski)
gifski(raster_maps,gif_file="jul75_temp.gif",delay=0.5,width=1000,height=500)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment