Data provider country statistics

New OBIS datasets since 2021

Config

start_year <- 2021
end_year <- 2024

Dependencies

library(DBI)
library(dplyr)
library(glue)
library(countrycode)
library(ggplot2)

Load data

readRenviron("env.txt")

con <- dbConnect(RPostgres::Postgres(), dbname = Sys.getenv("OBIS_DB"), host = Sys.getenv("OBIS_HOST"), port = Sys.getenv("OBIS_PORT"), user = Sys.getenv("OBIS_USER"), password = Sys.getenv("OBIS_PASSWORD"), bigint = "integer")

res <- dbSendQuery(con, glue("
    select
        datasets.id,
        datasets.url,
        (datasets.statistics->>'Occurrence')::int as records,
        extract('year' from datasets.created) as year,
        persons.organization,
        persons.country as person_country,
        ih.country as inst_country,
        persons.oceanexpert_institution_id
    from datasets
    left join metadata.lnk_dataset_person ldp on ldp.dataset_id = datasets.id
    left join metadata.persons on ldp.person_id = persons.id
    left join metadata.institution_hierarchy ih on ih.id = persons.oceanexpert_institution_id
    order by (datasets.statistics->>'Occurrence')::int desc nulls last, datasets.id
"))
datasets <- dbFetch(res) %>%
  mutate(inst_country = stringr::str_replace(inst_country, "Réunion", "France")) %>%
  mutate(inst_country = stringr::str_replace(inst_country, "Wallis Futuna Islands", "Wallis and Futuna Islands")) %>%
  distinct()

Cleanup country names

country_codes <- countrycode::codelist %>% select(name = un.name.en, iso2c) %>%
  filter(!is.na(iso2c))

datasets <- datasets %>%
  left_join(country_codes, by = c("person_country" = "iso2c")) %>%
  rowwise() %>%
  mutate(country = ifelse(!is.na(inst_country), inst_country, name)) %>%
  mutate(across(where(is.character), ~na_if(., ""))) %>%
  filter(!is.na(country))

head(datasets)
## # A tibble: 6 × 10
## # Rowwise: 
##   id                url   records  year organization person_country inst_country
##   <chr>             <chr>   <int> <dbl> <chr>        <chr>          <chr>       
## 1 46389886-a09c-4c… http… 8331913  2023 Flanders Ma… BE             Belgium     
## 2 efa02fe9-6b5b-41… http… 4583637  2020 CSIRO Natio… AU             Australia   
## 3 efa02fe9-6b5b-41… http… 4583637  2020 CSIRO Ocean… AU             Australia   
## 4 80479e14-2730-43… http… 3134870  2020 National Oc… <NA>           United Stat…
## 5 80479e14-2730-43… http… 3134870  2020 U.S. Geolog… <NA>           United Stat…
## 6 8b0d5fdd-6a3f-48… http… 3071809  2020 School of E… AU             Australia   
## # ℹ 3 more variables: oceanexpert_institution_id <int>, name <chr>,
## #   country <chr>

Add region info

countries <- m49::m49_full %>% mutate(country_or_area = stringr::str_replace(country_or_area, "Turkey", "Türkiye")) %>% select(country_or_area, ldc, lldc, sids, developed, developing, region_name) %>%
  mutate(classification = factor(ifelse(sids, "SIDS", ifelse(region_name == "Africa", "Africa", "other")), levels = c("SIDS", "Africa", "other")))

datasets <- datasets %>%
  left_join(countries, by = c("country" = "country_or_area"))

Statistics

recent_datasets <- datasets %>%
  filter(year >= start_year & year <= end_year)

recent_stats <- recent_datasets %>%
  distinct(id, records, year, country, sids, region_name, classification) %>%
  filter(!is.na(country)) %>%
  group_by(country, year, sids, region_name, classification) %>%
  summarize(datasets = n(), records = sum(records)) %>%
  arrange(desc(records))

head(recent_stats)
## # A tibble: 6 × 7
## # Groups:   country, year, sids, region_name [6]
##   country                 year sids  region_name classification datasets records
##   <chr>                  <dbl> <lgl> <chr>       <fct>             <int>   <int>
## 1 Belgium                 2023 FALSE Europe      other                26  1.33e7
## 2 United States of Amer…  2021 FALSE Americas    other               162  6.75e6
## 3 Canada                  2021 FALSE Americas    other                45  6.58e6
## 4 Belgium                 2022 FALSE Europe      other                60  5.18e6
## 5 Denmark                 2023 FALSE Europe      other                 9  4.58e6
## 6 Netherlands             2022 FALSE Europe      other                16  4.03e6
write.csv(recent_stats, file = "output/stats.csv", row.names = FALSE)
recent_stats %>%
  relocate(country, year, records) %>% rmarkdown::paged_table(options = list(rows.print = 100))

Visualize

stats_all_years <- recent_stats %>%
  group_by(country) %>%
  summarize(records = sum(records))

recent_stats <- recent_stats %>%
  mutate(country = factor(country, levels = stats_all_years$country[order(stats_all_years$records)]))

ggplot(data = recent_stats) +
  geom_bar(aes(x = country, y = records, fill = classification), stat = "identity") +
  theme_minimal() +
  coord_flip() +
  scale_fill_manual(values = c("#e9933e", "#f3c654", "#cccccc")) +
  ggtitle("Biodiversity records contributed to OBIS by country") +
  scale_y_continuous(labels = scales::label_comma()) +
  facet_wrap(~year)

ggsave("output/graph_untransformed.png", width = 12, height = 7, dpi = 300, scale = 1.2, bg = "white")

ggplot(data = recent_stats) +
  geom_bar(aes(x = country, y = records, fill = classification), stat = "identity") +
  theme_minimal() +
  coord_flip() +
  scale_fill_manual(values = c("#e9933e", "#f3c654", "#cccccc")) +
  ggtitle("Biodiversity records contributed to OBIS by country") +
  scale_y_continuous(labels = scales::label_comma(), trans = "log10") +
  facet_wrap(~year)

ggsave("output/graph.png", width = 12, height = 7, dpi = 300, scale = 1.2, bg = "white")

Statistics

Records and datasets per year:

recent_datasets %>%
  distinct(id, year, records) %>%
  group_by(year) %>%
  summarize(records = sum(records, na.rm = TRUE), datasets = n())
## # A tibble: 4 × 3
##    year  records datasets
##   <dbl>    <int>    <int>
## 1  2021 17326573      649
## 2  2022 14502286      391
## 3  2023 27529490      454
## 4  2024  8320863      325

Countries per year and classification:

recent_stats %>%
  group_by(year, classification) %>%
  summarize(records = sum(records, na.rm = TRUE), datasets = n(), countries = length(unique(country)))
## # A tibble: 12 × 5
## # Groups:   year [4]
##     year classification  records datasets countries
##    <dbl> <fct>             <int>    <int>     <int>
##  1  2021 SIDS              68960        4         4
##  2  2021 Africa             1982        3         3
##  3  2021 other          22060309       36        36
##  4  2022 SIDS            1054697       10        10
##  5  2022 Africa           252115        5         5
##  6  2022 other          19869472       42        42
##  7  2023 SIDS              87680        5         5
##  8  2023 Africa            24209        5         5
##  9  2023 other          35588728       32        32
## 10  2024 SIDS             411051        5         5
## 11  2024 Africa            11528        4         4
## 12  2024 other           9735449       38        38

All OBIS datasets so far

stats <- datasets %>%
  distinct(id, records, year, country, sids, region_name, classification) %>%
  filter(!is.na(country)) %>%
  group_by(country, year, sids, region_name, classification) %>%
  summarize(datasets = n(), records = sum(records)) %>%
  arrange(desc(records))

stats %>%
  group_by(country, classification) %>%
  summarize(min_year = min(year)) %>%
  group_by(classification, min_year) %>%
  summarize(countries = n()) %>%
  arrange(classification, !is.na(min_year), min_year) %>%
  group_by(classification) %>%
  mutate(cumulative_countries = cumsum(countries))
## # A tibble: 15 × 4
## # Groups:   classification [3]
##    classification min_year countries cumulative_countries
##    <fct>             <dbl>     <int>                <int>
##  1 SIDS                 NA        13                   13
##  2 SIDS               2019         1                   14
##  3 SIDS               2020         3                   17
##  4 SIDS               2021         1                   18
##  5 SIDS               2022         3                   21
##  6 Africa               NA        14                   14
##  7 Africa             2021         1                   15
##  8 Africa             2022         1                   16
##  9 Africa             2025         1                   17
## 10 other                NA        53                   53
## 11 other              2019         1                   54
## 12 other              2020         2                   56
## 13 other              2022         3                   59
## 14 other              2023         1                   60
## 15 other              2024         1                   61

IOC member states

ioc_members <- c("Afghanistan", "Albania", "Algeria", "Angola", "Argentina", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bangladesh", "Barbados", "Belgium", "Belize", "Benin", "Brazil", "Bulgaria", "Cabo Verde", "Cameroon", "Canada", "Chile", "China", "Colombia", "Comoros", "Congo", "Cook Islands", "Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", "Cyprus", "Czechia", "Democratic People's Republic of Korea", "Democratic Republic of the Congo", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Erytrea", "Estonia", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Iceland", "India", "Indonesia", "Iran (Islamic Republic of)", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Kuwait", "Lebanon", "Libya", "Madagascar", "Malaysia", "Maldives", "Malta", "Mauritania", "Mauritius", "Mexico", "Monaco", "Montenegro", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Netherlands", "New Zealand", "Nicaragua", "Nigeria", "Niue", "Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Republic of Korea", "Romania", "Russian Federation", "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syrian Arab Republic", "Thailand", "Timor-Leste", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Ukraine", "United Arab Emirates", "United Kingdom of Great Britain and Northern Ireland", "United Republic of Tanzania", "United States of America", "Uruguay", "Vanuatu", "Venezuela (Bolivarian Republic of)", "Viet Nam", "Yemen")
sort(unique(stats$country))
##  [1] "Algeria"                                             
##  [2] "Angola"                                              
##  [3] "Argentina"                                           
##  [4] "Australia"                                           
##  [5] "Austria"                                             
##  [6] "Bangladesh"                                          
##  [7] "Belgium"                                             
##  [8] "Benin"                                               
##  [9] "Brazil"                                              
## [10] "Bulgaria"                                            
## [11] "Cameroon"                                            
## [12] "Canada"                                              
## [13] "Cayman Islands"                                      
## [14] "Chile"                                               
## [15] "China"                                               
## [16] "Colombia"                                            
## [17] "Costa Rica"                                          
## [18] "Croatia"                                             
## [19] "Cuba"                                                
## [20] "Cyprus"                                              
## [21] "Czechia"                                             
## [22] "Denmark"                                             
## [23] "Dominican Republic"                                  
## [24] "Ecuador"                                             
## [25] "Egypt"                                               
## [26] "Estonia"                                             
## [27] "Fiji"                                                
## [28] "Finland"                                             
## [29] "France"                                              
## [30] "Germany"                                             
## [31] "Greece"                                              
## [32] "Greenland"                                           
## [33] "Guinea"                                              
## [34] "Iceland"                                             
## [35] "India"                                               
## [36] "Indonesia"                                           
## [37] "Iran (Islamic Republic of)"                          
## [38] "Ireland"                                             
## [39] "Israel"                                              
## [40] "Italy"                                               
## [41] "Jamaica"                                             
## [42] "Japan"                                               
## [43] "Kenya"                                               
## [44] "Kiribati"                                            
## [45] "Lebanon"                                             
## [46] "Lithuania"                                           
## [47] "Malaysia"                                            
## [48] "Malta"                                               
## [49] "Marshall Islands"                                    
## [50] "Mauritania"                                          
## [51] "Mexico"                                              
## [52] "Micronesia (Federated States of)"                    
## [53] "Montenegro"                                          
## [54] "Morocco"                                             
## [55] "Mozambique"                                          
## [56] "Namibia"                                             
## [57] "Nauru"                                               
## [58] "Netherlands"                                         
## [59] "New Caledonia"                                       
## [60] "New Zealand"                                         
## [61] "Nigeria"                                             
## [62] "Niue"                                                
## [63] "Norway"                                              
## [64] "Palau"                                               
## [65] "Papua New Guinea"                                    
## [66] "Peru"                                                
## [67] "Philippines"                                         
## [68] "Poland"                                              
## [69] "Portugal"                                            
## [70] "Puerto Rico"                                         
## [71] "Qatar"                                               
## [72] "Republic of Korea"                                   
## [73] "Russian Federation"                                  
## [74] "Samoa"                                               
## [75] "Senegal"                                             
## [76] "Serbia"                                              
## [77] "Singapore"                                           
## [78] "Slovenia"                                            
## [79] "Solomon Islands"                                     
## [80] "South Africa"                                        
## [81] "Spain"                                               
## [82] "Sweden"                                              
## [83] "Switzerland"                                         
## [84] "Thailand"                                            
## [85] "Togo"                                                
## [86] "Tonga"                                               
## [87] "Trinidad and Tobago"                                 
## [88] "Tunisia"                                             
## [89] "Türkiye"                                             
## [90] "Tuvalu"                                              
## [91] "Ukraine"                                             
## [92] "United Kingdom of Great Britain and Northern Ireland"
## [93] "United Republic of Tanzania"                         
## [94] "United States of America"                            
## [95] "United States Virgin Islands"                        
## [96] "Uruguay"                                             
## [97] "Vanuatu"                                             
## [98] "Venezuela (Bolivarian Republic of)"                  
## [99] "Wallis and Futuna Islands"
setdiff(ioc_members, unique(stats$country))
##  [1] "Afghanistan"                          
##  [2] "Albania"                              
##  [3] "Azerbaijan"                           
##  [4] "Bahamas"                              
##  [5] "Barbados"                             
##  [6] "Belize"                               
##  [7] "Cabo Verde"                           
##  [8] "Comoros"                              
##  [9] "Congo"                                
## [10] "Cook Islands"                         
## [11] "Cote d'Ivoire"                        
## [12] "Democratic People's Republic of Korea"
## [13] "Democratic Republic of the Congo"     
## [14] "Djibouti"                             
## [15] "Dominica"                             
## [16] "El Salvador"                          
## [17] "Equatorial Guinea"                    
## [18] "Erytrea"                              
## [19] "Ethiopia"                             
## [20] "Gabon"                                
## [21] "Gambia"                               
## [22] "Georgia"                              
## [23] "Ghana"                                
## [24] "Grenada"                              
## [25] "Guatemala"                            
## [26] "Guinea-Bissau"                        
## [27] "Guyana"                               
## [28] "Haiti"                                
## [29] "Honduras"                             
## [30] "Iraq"                                 
## [31] "Jordan"                               
## [32] "Kazakhstan"                           
## [33] "Kuwait"                               
## [34] "Libya"                                
## [35] "Madagascar"                           
## [36] "Maldives"                             
## [37] "Mauritius"                            
## [38] "Monaco"                               
## [39] "Myanmar"                              
## [40] "Nicaragua"                            
## [41] "Oman"                                 
## [42] "Pakistan"                             
## [43] "Panama"                               
## [44] "Romania"                              
## [45] "Saint Kitts and Nevis"                
## [46] "Saint Lucia"                          
## [47] "Saint Vincent and the Grenadines"     
## [48] "Saudi Arabia"                         
## [49] "Seychelles"                           
## [50] "Sierra Leone"                         
## [51] "Somalia"                              
## [52] "Sri Lanka"                            
## [53] "Sudan"                                
## [54] "Suriname"                             
## [55] "Syrian Arab Republic"                 
## [56] "Timor-Leste"                          
## [57] "Turkey"                               
## [58] "Turkmenistan"                         
## [59] "United Arab Emirates"                 
## [60] "Viet Nam"                             
## [61] "Yemen"
setdiff(unique(stats$country), ioc_members)
##  [1] "Puerto Rico"                      "United States Virgin Islands"    
##  [3] "New Caledonia"                    "Marshall Islands"                
##  [5] "Micronesia (Federated States of)" "Türkiye"                         
##  [7] "Greenland"                        "Lithuania"                       
##  [9] "Wallis and Futuna Islands"        "Cayman Islands"

BioEco GeoNode monitoring programmes

layers <- jsonlite::fromJSON("https://geonode.goosocean.org/api/layers/")$objects
stopifnot(nrow(layers) < 1000)
regions <- unlist(layers$regions)

Fix region names:

unique(regions[which(!regions %in% countries$country_or_area)])
##  [1] "Global"                          "Mediterranean Sea"              
##  [3] "Alaska"                          "California"                     
##  [5] "Baltic Sea"                      "Queensland"                     
##  [7] "Southern Ocean"                  "Turkey"                         
##  [9] "Newfoundland and Labrador"       "Faeroe Islands"                 
## [11] "United Kingdom"                  "Americas"                       
## [13] "Scotland"                        "Europe"                         
## [15] "North America"                   "Pacific"                        
## [17] "Baker Island"                    "Hawaii"                         
## [19] "Northwestern Hawaiian Islands"   "Howland Island"                 
## [21] "Jarvis Islands"                  "Johnston Atoll"                 
## [23] "Kingman Reef"                    "Micronesia, Federated States of"
## [25] "Midway Islands"                  "Palmyra Atoll"                  
## [27] "Wake Island"                     "South America"                  
## [29] "Caribbean"                       "Asia"                           
## [31] "Middle East"                     "Cape Verde"
regions <- plyr::revalue(regions, c(
  "United Kingdom" = "United Kingdom of Great Britain and Northern Ireland",
  "Scotland" = "United Kingdom of Great Britain and Northern Ireland",
  "Hawaii" = "United States of America",
  "Alaska" = "United States of America",
  "Queensland" = "Australia",
  "Queensland" = "Australia",
  "Newfoundland and Labrador" = "Canada",
  "California" = "United States of America",
  "Turkey" = "Türkiye",
  "Micronesia, Federated States of" = "Micronesia (Federated States of)",
  "Cape Verde" = "Cabo Verde",
  "Faeroe Islands" = "Faroe Islands",
  "Baker Island" = "United States of America",
  "Northwestern Hawaiian Islands" = "United States of America",
  "Howland Island" = "United States of America",
  "Jarvis Islands" = "United States of America",
  "Johnston Atoll" = "United States of America",
  "Kingman Reef" = "United States of America",
  "Micronesia, Federated States of" = "Micronesia (Federated States of)",
  "Midway Islands" = "United States of America",
  "Palmyra Atoll" = "United States of America",
  "Wake Island" = "United States of America",
  "Cape Verde" = "Cabo Verde"
))
bioeco_countries <- countries %>%
  filter(country_or_area %in% regions)

bioeco_countries %>%
  group_by(classification) %>%
  summarize(n())
## # A tibble: 4 × 2
##   classification `n()`
##   <fct>          <int>
## 1 SIDS              14
## 2 Africa             7
## 3 other             50
## 4 <NA>               1