OBIS species richness for OSPAR
Loading occurrences from parquet and indexing to H3
The biodiversity indicators will be calculated for hexagonal grid cells. Load occurrences from the full parquet export on S3, and index on the H3 hexagonal grids of resolution 3 and 4.
library(tibble)
library(arrow)
library(dplyr)
library(sf)
library(glue)
library(h3)
<- S3FileSystem$create(
space anonymous = TRUE,
scheme = "https",
endpoint_override = "ams3.digitaloceanspaces.com"
)
<- open_dataset(space$path("obis-datasets/exports/obis_20220710.parquet")) %>%
df select(decimalLongitude, decimalLatitude, minimumDepthInMeters, maximumDepthInMeters, species) %>%
filter(!is.na(species)) %>%
map_batches(function(batch) {
%>%
batch as.data.frame() %>%
mutate_at(vars(minimumDepthInMeters, maximumDepthInMeters), as.numeric) %>%
rowwise() %>%
mutate(depth = mean(na.omit(c(minimumDepthInMeters, maximumDepthInMeters)))) %>%
select(-minimumDepthInMeters, -maximumDepthInMeters) %>%
group_by(decimalLongitude, decimalLatitude, depth, species) %>%
summarize(records = n()) %>%
ungroup() %>%
st_as_sf(coords = c("decimalLongitude", "decimalLatitude")) %>%
mutate(cell3 = geo_to_h3(., res = 3)) %>%
mutate(cell4 = geo_to_h3(., res = 4)) %>%
mutate(p3 = substr(cell3, 5, 6)) %>%
mutate(p4 = substr(cell4, 5, 6)) %>%
st_drop_geometry()
})
<- unique(df$p3)
partitions3 <- unique(df$p4) partitions4
Write to disk
Write the datasets to disk as partitioned parquet files and clean up the data frame:
<- tempdir()
temp_dir <- file.path(temp_dir, "cell3")
dir3 <- file.path(temp_dir, "cell4")
dir4 unlink(dir3, recursive = TRUE)
unlink(dir4, recursive = TRUE)
%>% select(-cell4, -p4) %>% write_dataset(path = dir3, partitioning = "p3", format = "parquet")
df %>% select(-cell3, -p3) %>% write_dataset(path = dir4, partitioning = "p4", format = "parquet")
df
remove(df)
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 1357687 72.6 3196684 170.8 NA 3196684 170.8
## Vcells 63234745 482.5 509571652 3887.8 102400 617309275 4709.7
Calculating indicators
The following method calculates the number of records, species richness, Simpson index, Shannon index, Hurlbert index, and Hill numbers for each cell:
library(gsl)
<- function(df, cell_col = "cell", esn = 50) {
calc <- df %>% rename(cell = !!sym(cell_col))
df <- df %>%
t1 group_by(cell, species) %>%
summarize(ni = sum(records))
<- t1 %>%
t2 group_by(cell) %>%
mutate(n = sum(ni))
<- t2 %>%
t3 group_by(cell, species) %>%
mutate(
hi = -(ni/n*log(ni/n)), si = (ni/n)^2, qi = ni/n,
esi = case_when(
-ni >= esn ~ 1-exp(lngamma(n-ni+1)+lngamma(n-esn+1)-lngamma(n-ni-esn+1)-lngamma(n+1)),
n>= esn ~ 1
n
)
)<- t3 %>%
t4 group_by(cell) %>%
summarize(n = sum(ni), sp = n(), shannon = sum(hi), simpson = sum(si), maxp = max(qi), es = sum(esi))
<- t4 %>%
result mutate(hill_1 = exp(shannon), hill_2 = 1/simpson, hill_inf = 1/maxp)
return(result)
}
Then read each partition, perform the calculations, and transform to a spatial grid:
<- purrr::map(partitions3, function(partition_id) {
results3 <- open_dataset(dir3) %>%
ss filter(p3 == partition_id) %>%
collect()
<- calc(ss, cell_col = "cell3")
res gc()
res%>%
}) bind_rows() %>%
mutate(h3_to_geo_boundary_sf(cell)) %>%
st_as_sf() %>%
st_wrap_dateline()
<- purrr::map(partitions4, function(partition_id) {
results4 <- open_dataset(dir4) %>%
ss filter(p4 == partition_id) %>%
collect()
<- calc(ss, cell_col = "cell4")
res gc()
res%>%
}) bind_rows() %>%
mutate(h3_to_geo_boundary_sf(cell)) %>%
st_as_sf() %>%
st_wrap_dateline()
Use the same procedure to calculate results for shallow and deep occurrences only:
<- purrr::map(partitions3, function(partition_id) {
results3_shallow <- open_dataset(dir3) %>%
ss filter(p3 == partition_id & depth < 100) %>%
collect()
<- calc(ss, cell_col = "cell3")
res gc()
res%>%
}) bind_rows() %>%
mutate(h3_to_geo_boundary_sf(cell)) %>%
st_as_sf() %>%
st_wrap_dateline()
<- purrr::map(partitions3, function(partition_id) {
results3_deep <- open_dataset(dir3) %>%
ss filter(p3 == partition_id & depth >= 100) %>%
collect()
<- calc(ss, cell_col = "cell3")
res gc()
res%>%
}) bind_rows() %>%
mutate(h3_to_geo_boundary_sf(cell)) %>%
st_as_sf() %>%
st_wrap_dateline()
Maps
library(rnaturalearth)
library(rnaturalearthdata)
library(viridis)
library(ggplot2)
library(orthoview)
library(scales)
<- ne_countries(scale = "medium", returnclass = "sf")
world <- st_bbox(c(xmin = -45, xmax = 55, ymin = 10, ymax = 90)) %>%
boundary_atlantic st_as_sfc() %>%
st_segmentize(1) %>%
st_set_crs(4326)
<- read_sf("NACES_MPA_Shp/NACES_MPA.shp")
boundary_naces <- read_sf("OSPAR_Subregions/OSPAR_subregions_20160418_3857.shp") %>% st_buffer(1000) %>% st_union() %>% st_transform(4326)
boundary_ospar
<- -10
lon <- 40
lat
<- results3 %>% orthoview(lon, lat)
ortho3 <- results4 %>% orthoview(lon, lat)
ortho4 <- results3_shallow %>% orthoview(lon, lat)
ortho3_shallow <- results3_deep %>% orthoview(lon, lat)
ortho3_deep <- world %>% orthoview(lon, lat)
world_ortho
<- function(df, variable, name, boundary, trans = "identity", ...) {
make_plot ggplot() +
geom_sf(data = df, aes_string(fill = variable), lwd = 0) +
scale_fill_viridis(option = "inferno", na.value = "white", trans = trans, labels = comma, name = name) +
geom_sf(data = world_ortho, fill = "#ffffff", color = "#000000", size = 0.1) +
geom_sf(data = boundary, color = "#ff4000", fill = NA, size = 1.3) +
theme_minimal() +
guides(fill = guide_colourbar(barwidth = 10)) +
theme(panel.grid.major = element_blank(), legend.position = "bottom") +
coord_sf(crs = create_ortho_string(lon, lat), ...)
}
make_plot(ortho3, "n", "Records", boundary_atlantic, "log10")
make_plot(ortho3, "sp", "Species richness", boundary_atlantic, "log10")
make_plot(ortho3, "n", "Records", boundary_ospar, "log10")
make_plot(ortho3, "sp", "Species richness", boundary_ospar, "log10")
make_plot(ortho4, "n", "Records", boundary_naces, "log10")
make_plot(ortho4, "sp", "Species richness", boundary_naces, "log10")
NACES MPA in detail:
<- boundary_naces %>% st_buffer(10) %>% st_transform(create_ortho_string(lon, lat)) %>% st_bbox()
bb make_plot(ortho3, "sp", "Species richness", boundary_naces, "log10", xlim = c(bb$xmin, bb$xmax), ylim = c(bb$ymin, bb$ymax))
make_plot(ortho4, "sp", "Species richness", boundary_naces, "log10", xlim = c(bb$xmin, bb$xmax), ylim = c(bb$ymin, bb$ymax))
Shallow occurrences
make_plot(ortho3_shallow, "n", "Records", boundary_ospar, "log10")
make_plot(ortho3_shallow, "sp", "Species richness", boundary_ospar, "log10")
Deep occurrences
make_plot(ortho3_deep, "n", "Records", boundary_ospar, "log10")
make_plot(ortho3_deep, "sp", "Species richness", boundary_ospar, "log10")
Species lists
<- function(boundary, filename, min_depth = NULL, max_depth = NULL) {
create_species_list <- polyfill(boundary, res = 4)
cells <- open_dataset(dir4) %>%
df filter(cell4 %in% cells)
if (!is.null(min_depth)) {
<- df %>% filter(depth >= min_depth)
df
}if (!is.null(max_depth)) {
<- df %>% filter(depth < max_depth)
df
}<- df %>% distinct(species) %>% collect()
species write.table(species, filename, row.names = FALSE, col.names = FALSE, quote = FALSE)
}
All species North Atlantic:
create_species_list(boundary_atlantic, "output/species_atlantic.txt")
All species OSPAR:
create_species_list(boundary_ospar, "output/species_ospar.txt")
All species NACES MPA:
create_species_list(boundary_naces, "output/species_naces.txt")
Shallow species North Atlantic:
create_species_list(boundary_atlantic, "output/species_atlantic_shallow.txt", max_depth = 100)
Shallow species OSPAR:
create_species_list(boundary_ospar, "output/species_ospar_shallow.txt", max_depth = 100)
Shallow species NACES MPA:
create_species_list(boundary_naces, "output/species_naces_shallow.txt", max_depth = 100)
Deep species North Atlantic:
create_species_list(boundary_atlantic, "output/species_atlantic_deep.txt", min_depth = 100)
Deep species OSPAR:
create_species_list(boundary_ospar, "output/species_ospar_deep.txt", min_depth = 100)
Deep species NACES MPA:
create_species_list(boundary_naces, "output/species_naces_deep.txt", min_depth = 100)