Diversity indicators using OBIS data

This notebook shows how to calculate diversity indicators layers based on OBIS data. The indicators are calculated for a discrete global grid.

Read the occurrence data

Parquet exports of all OBIS presence records are available from https://obis.org/manual/access/. Read the coordinates and species columns, and summarize by location:

library(arrow)
library(dplyr)

occ <- open_dataset("~/Desktop/temp/obis_20211017.parquet") %>%
  select(decimalLongitude, decimalLatitude, species) %>%
  group_by(decimalLongitude, decimalLatitude, species) %>%
  collect() %>%
  summarize(records = n())

Create a discrete global grid

Create an ISEA discrete global grid of resolution 9 using the dggridR package:

library(dggridR)

dggs <- dgconstruct(projection = "ISEA", topology = "HEXAGON", res = 9)

Here’s on overview of all possible resolutions:

inf <- dginfo(dggs)
##    res            cells                area_km      spacing_km          cls_km
## 1    0               12 51006562.1724088713527 7053.6524314108 8199.5003701020
## 2    1               32 17002187.3908029571176 4072.4281300451 4678.9698717297
## 3    2               92  5667395.7969343187287 2351.2174771369 2691.2520709129
## 4    3              272  1889131.9323114396539 1357.4760433484 1551.8675487723
## 5    4              812   629710.6441038132180  783.7391590456  895.6018416484
## 6    5             2432   209903.5480346044060  452.4920144495  517.0049969031
## 7    6             7292    69967.8493448681256  261.2463863485  298.4793231872
## 8    7            21872    23322.6164482893764  150.8306714832  172.3244908961
## 9    8            65612     7774.2054827631255   87.0821287828   99.4910857272
## 10   9           196832     2591.4018275877088   50.2768904944   57.4411078487
## 11  10           590492      863.8006091959029   29.0273762609   33.1636203580
## 12  11          1771472      287.9335363986343   16.7589634981   19.1470215381
## 13  12          5314412       95.9778454662114    9.6757920870   11.0545373459
## 14  13         15943232       31.9926151554038    5.5863211660    6.3823399790
## 15  14         47829692       10.6642050518013    3.2252640290    3.6848456792
## 16  15        143489072        3.5547350172671    1.8621070553    2.1274466399
## 17  16        430467212        1.1849116724224    1.0750880097    1.2282818893
## 18  17       1291401632        0.3949705574741    0.6207023518    0.7091488792
## 19  18       3874204892        0.1316568524914    0.3583626699    0.4094272963
## 20  19      11622614672        0.0438856174971    0.2069007839    0.2363829597
## 21  20      34867844012        0.0146285391657    0.1194542233    0.1364757654
## 22  21     104603532032        0.0048761797219    0.0689669280    0.0787943199
## 23  22     313810596092        0.0016253932406    0.0398180744    0.0454919218
## 24  23     941431788272        0.0005417977469    0.0229889760    0.0262647733
## 25  24    2824295364812        0.0001805992490    0.0132726915    0.0151639739
## 26  25    8472886094432        0.0000601997497    0.0076629920    0.0087549244
## 27  26   25418658283292        0.0000200665832    0.0044242305    0.0050546580
## 28  27   76255974849872        0.0000066888611    0.0025543307    0.0029183081
## 29  28  228767924549612        0.0000022296204    0.0014747435    0.0016848860
## 30  29  686303773648832        0.0000007432068    0.0008514436    0.0009727694
## 31  30 2058911320946492        0.0000002477356    0.0004915812    0.0005616287

Then assign cell numbers to the occurrence data:

occ$cell <- dgtransform(dggs, occ$decimalLatitude, occ$decimalLongitude)

Calculate metrics

The following function calculates the number of records, species richness, Simpson index, Shannon index, Hurlbert index (n = 50), and Hill numbers for each cell.

library(gsl)

calc <- function(df, esn = 50) {
  t1 <- df %>%
    group_by(cell, species) %>%
    summarize(ni = sum(records))
  t2 <- t1 %>%
    group_by(cell) %>%
    mutate(n = sum(ni))
  t3 <- t2 %>%
    group_by(cell, species) %>%
    mutate(
      hi = -(ni/n*log(ni/n)),
      si = (ni/n)^2,
      qi = ni/n,
      esi = case_when(
        n-ni >= esn ~ 1-exp(lngamma(n-ni+1)+lngamma(n-esn+1)-lngamma(n-ni-esn+1)-lngamma(n+1)),
        n >= esn ~ 1
      )
    )
  t4 <- t3 %>%
    group_by(cell) %>%
    summarize(
      n = sum(ni),
      sp = n(),
      shannon = sum(hi),
      simpson = sum(si),
      maxp = max(qi),
      es = sum(esi)
    )
  result <- t4 %>%
    mutate(
      hill_1 = exp(shannon),
      hill_2 = 1/simpson,
      hill_inf = 1/maxp
    )
  return(result)
}

Perform the calculation on species level data:

metrics <- occ %>%
  #filter(!is.na(species)) %>%
  calc(50)

metrics
## # A tibble: 125,954 x 10
##     cell      n    sp shannon simpson  maxp    es hill_1 hill_2 hill_inf
##  * <dbl>  <int> <int>   <dbl>   <dbl> <dbl> <dbl>  <dbl>  <dbl>    <dbl>
##  1     1 323324  1413    4.29   0.101 0.308  30.3  73.0    9.93     3.25
##  2     2    917    50    2.30   0.262 0.497  15.9  10.0    3.82     2.01
##  3     3    605    39    2.11   0.303 0.539  14.8   8.26   3.30     1.86
##  4     4    210    35    2.24   0.278 0.514  16.9   9.35   3.59     1.94
##  5     5    432    79    2.72   0.253 0.498  21.5  15.2    3.95     2.01
##  6     6     10     7    1.83   0.18  0.3    NA     6.26   5.56     3.33
##  7     7    128    24    1.93   0.323 0.555  14.2   6.91   3.09     1.80
##  8     8     68    16    2.21   0.172 0.353  14.3   9.15   5.81     2.83
##  9     9    298    64    2.95   0.119 0.235  20.7  19.1    8.43     4.26
## 10    10    881    91    2.53   0.241 0.451  17.6  12.6    4.15     2.22
## # … with 125,944 more rows

Add cell geometries to the metrics table:

library(sf)

grid <- dgearthgrid(dggs, frame = FALSE, wrapcells = FALSE)
grid_sf <- grid %>%
  st_as_sf() %>%
  st_wrap_dateline(options = c("WRAPDATELINE=YES", "DATELINEOFFSET=230"))
grid_sf$cell <- names(grid)

metrics <- merge(metrics, grid_sf, by.x = "cell", by.y = "cell") %>%
    filter(st_intersects(geometry, st_as_sfc("SRID=4326;POLYGON((-180 85,180 85,180 -85,-180 -85,-180 85))"), sparse = FALSE))

Let’s check the results by creating a Shannon index map:

library(rnaturalearth)
library(rnaturalearthdata)
library(viridis)
library(ggplot2)

world <- ne_countries(scale = "medium", returnclass = "sf")

ggplot() +
    geom_sf(data = metrics, aes_string(fill = "shannon", geometry = "geometry"), lwd = 0) +
    scale_fill_viridis(option = "inferno", na.value = "white", name = "Shannon index") +
    geom_sf(data = world, fill = "#dddddd", color = NA) +
    theme(panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), panel.background = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) + xlab("") + ylab("") +
    coord_sf()

Output

Shapefile

Write the metrics table to a shapefile:

files <- dir("shapefile", full.names = TRUE)
file.remove(files)
st_write(metrics, "shapefile/indicators.shp", layer = "indicators")
files <- dir("shapefile", full.names = TRUE)
zip(zipfile = "shapefile/metrics.zip", files = files)
file.remove(files)

Map image

This create a high resolution PNG map image.

metrics <- st_sf(metrics, sf_column_name = "geometry")
st_crs(metrics) <- 4326

ggplot() +
  geom_sf(data = metrics, aes_string(fill = "n", color = "n", geometry = "geometry"), lwd = 0.04) +
  scale_color_viridis(option = "inferno", na.value = "white", name = "Number of records", trans = "log10") +
  scale_fill_viridis(option = "inferno", na.value = "white", name = "Number of records", trans = "log10") +
  geom_sf(data = world, fill = "#dddddd", color = "#666666", lwd = 0.1) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(), 
    panel.background = element_blank(),
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    legend.position = "bottom"
  ) +
  xlab("") + ylab("") +
  coord_sf(crs = "+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" )

ggsave("map/map.png", width = 12, height = 6, dpi = 600)