Last active
December 11, 2023 17:25
-
-
Save ababaian/042c3486e293c6498bc6a3850b1ddcd2 to your computer and use it in GitHub Desktop.
srageo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Geospatial origins of SRA bioSamples" | |
output: pdf_notebook | |
--- | |
# Dependencies | |
```{r} | |
library(ggplot2) | |
library(ggExtra) | |
library(gridExtra) | |
library(dplyr) | |
library(sf) | |
library(viridisLite) | |
library(scales) | |
library(rnaturalearth) | |
library(rnaturalearthdata) | |
library(gridExtra) | |
``` | |
# Download/Load Data (victorlin) | |
Geo data was scrapped from bioSample XML by vlin. | |
Repo: https://github.com/serratus-bio/biosample-sql | |
csv file is hosted on S3 | |
```{r} | |
# Download dataset | |
library(utils) | |
download.file(url = 'https://serratus-public.s3.amazonaws.com/geo/vl_geo_210224.csv.gz', | |
destfile = './vl_geo_210224.csv.gz', | |
method = 'wget' ) | |
``` | |
```{r} | |
# Load parsed lon-lat data (VL) | |
coord.df <- read.table(file = 'vl_geo_210224.csv.gz', | |
sep = ',', header = T) | |
colnames(coord.df) <- c('SRA', 'BioSample', 'date', 'lon', 'lat', 'geotext') | |
# Retain only lat/lon if it's within worldmap coordinates | |
coord.df <- coord.df[ (coord.df$lat > -90 & coord.df$lat < 90), ] | |
coord.df <- coord.df[ (coord.df$lon > -180 & coord.df$lon < 180), ] | |
``` | |
# Histogram of lat/lon | |
```{r} | |
# VN SQL data | |
lon.hist <- ggplot( coord.df, aes(lon)) + | |
geom_histogram(bins = 360) + | |
xlim(c(-180,180)) + | |
theme_bw() | |
lat.hist <- ggplot( coord.df, aes(lat)) + | |
geom_histogram(bins = 360) + | |
xlim(c(-90,90)) + coord_flip() + | |
theme_bw() | |
grid.arrange(lat.hist, lon.hist) | |
``` | |
# Europe Heatmap | |
```{r} | |
# Plot Europe -- DARK | |
world <- ne_countries(scale = "medium", returnclass = "sf") | |
# Hex with viridis | |
euro <- ggplot(data = world) + | |
geom_sf(fill = 'black', color = 'gray5') + | |
theme(panel.background = element_rect( fill = 'gray10'), | |
panel.grid.major = element_line(color = 'gray20', | |
linetype = 'dashed', | |
size = 0.2)) + | |
geom_hex( bins = 1800, data = coord.df, | |
aes(x = lon, y = lat)) + | |
coord_sf(xlim = c(-15,25), ylim = c(30,60)) + | |
scale_fill_continuous(type = "viridis", option = "plasma", trans = "log2") | |
euro | |
``` | |
# Worldmap heatmap | |
```{r} | |
# Plot Worldmap -- DARK | |
world <- ne_countries(scale = "medium", returnclass = "sf") | |
# Hex with viridis | |
earth <- ggplot(data = world) + | |
geom_sf(fill = 'black', color = 'gray5') + | |
theme(panel.background = element_rect( fill = 'gray10'), | |
panel.grid.major = element_line(color = 'gray20', | |
linetype = 'dashed', | |
size = 0.2)) + | |
geom_hex( bins = 720, data = coord.df, | |
aes(x = lon, y = lat)) + | |
scale_fill_continuous(type = "viridis", option = "plasma", trans = "log2") | |
earth | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment