Data provider country statistics
New OBIS datasets since 2021
Load data
readRenviron("env.txt")
con <- dbConnect(RPostgres::Postgres(), dbname = Sys.getenv("OBIS_DB"), host = Sys.getenv("OBIS_HOST"), port = Sys.getenv("OBIS_PORT"), user = Sys.getenv("OBIS_USER"), password = Sys.getenv("OBIS_PASSWORD"), bigint = "integer")
res <- dbSendQuery(con, glue("
select
datasets.id,
datasets.url,
(datasets.statistics->>'Occurrence')::int as records,
extract('year' from datasets.created) as year,
persons.organization,
persons.country as person_country,
ih.country as inst_country,
persons.oceanexpert_institution_id
from datasets
left join metadata.lnk_dataset_person ldp on ldp.dataset_id = datasets.id
left join metadata.persons on ldp.person_id = persons.id
left join metadata.institution_hierarchy ih on ih.id = persons.oceanexpert_institution_id
order by (datasets.statistics->>'Occurrence')::int desc nulls last, datasets.id
"))
datasets <- dbFetch(res) %>%
mutate(inst_country = stringr::str_replace(inst_country, "Réunion", "France")) %>%
mutate(inst_country = stringr::str_replace(inst_country, "Wallis Futuna Islands", "Wallis and Futuna Islands")) %>%
distinct()Cleanup country names
country_codes <- countrycode::codelist %>% select(name = un.name.en, iso2c) %>%
filter(!is.na(iso2c))
datasets <- datasets %>%
left_join(country_codes, by = c("person_country" = "iso2c")) %>%
rowwise() %>%
mutate(country = ifelse(!is.na(inst_country), inst_country, name)) %>%
mutate(across(where(is.character), ~na_if(., ""))) %>%
filter(!is.na(country))
head(datasets)## # A tibble: 6 × 10
## # Rowwise:
## id url records year organization person_country inst_country
## <chr> <chr> <int> <dbl> <chr> <chr> <chr>
## 1 46389886-a09c-4c… http… 8331913 2023 Flanders Ma… BE Belgium
## 2 efa02fe9-6b5b-41… http… 4583637 2020 CSIRO Natio… AU Australia
## 3 efa02fe9-6b5b-41… http… 4583637 2020 CSIRO Ocean… AU Australia
## 4 80479e14-2730-43… http… 3134870 2020 National Oc… <NA> United Stat…
## 5 80479e14-2730-43… http… 3134870 2020 U.S. Geolog… <NA> United Stat…
## 6 8b0d5fdd-6a3f-48… http… 3071809 2020 School of E… AU Australia
## # ℹ 3 more variables: oceanexpert_institution_id <int>, name <chr>,
## # country <chr>
Add region info
countries <- m49::m49_full %>% mutate(country_or_area = stringr::str_replace(country_or_area, "Turkey", "Türkiye")) %>% select(country_or_area, ldc, lldc, sids, developed, developing, region_name) %>%
mutate(classification = factor(ifelse(sids, "SIDS", ifelse(region_name == "Africa", "Africa", "other")), levels = c("SIDS", "Africa", "other")))
datasets <- datasets %>%
left_join(countries, by = c("country" = "country_or_area"))Statistics
recent_datasets <- datasets %>%
filter(year >= start_year & year <= end_year)
recent_stats <- recent_datasets %>%
distinct(id, records, year, country, sids, region_name, classification) %>%
filter(!is.na(country)) %>%
group_by(country, year, sids, region_name, classification) %>%
summarize(datasets = n(), records = sum(records)) %>%
arrange(desc(records))
head(recent_stats)## # A tibble: 6 × 7
## # Groups: country, year, sids, region_name [6]
## country year sids region_name classification datasets records
## <chr> <dbl> <lgl> <chr> <fct> <int> <int>
## 1 Belgium 2023 FALSE Europe other 26 1.33e7
## 2 United States of Amer… 2021 FALSE Americas other 162 6.75e6
## 3 Canada 2021 FALSE Americas other 45 6.58e6
## 4 Belgium 2022 FALSE Europe other 60 5.18e6
## 5 Denmark 2023 FALSE Europe other 9 4.58e6
## 6 Netherlands 2022 FALSE Europe other 16 4.03e6
write.csv(recent_stats, file = "output/stats.csv", row.names = FALSE)
recent_stats %>%
relocate(country, year, records) %>% rmarkdown::paged_table(options = list(rows.print = 100))Visualize
stats_all_years <- recent_stats %>%
group_by(country) %>%
summarize(records = sum(records))
recent_stats <- recent_stats %>%
mutate(country = factor(country, levels = stats_all_years$country[order(stats_all_years$records)]))
ggplot(data = recent_stats) +
geom_bar(aes(x = country, y = records, fill = classification), stat = "identity") +
theme_minimal() +
coord_flip() +
scale_fill_manual(values = c("#e9933e", "#f3c654", "#cccccc")) +
ggtitle("Biodiversity records contributed to OBIS by country") +
scale_y_continuous(labels = scales::label_comma()) +
facet_wrap(~year)ggsave("output/graph_untransformed.png", width = 12, height = 7, dpi = 300, scale = 1.2, bg = "white")
ggplot(data = recent_stats) +
geom_bar(aes(x = country, y = records, fill = classification), stat = "identity") +
theme_minimal() +
coord_flip() +
scale_fill_manual(values = c("#e9933e", "#f3c654", "#cccccc")) +
ggtitle("Biodiversity records contributed to OBIS by country") +
scale_y_continuous(labels = scales::label_comma(), trans = "log10") +
facet_wrap(~year)Statistics
Records and datasets per year:
recent_datasets %>%
distinct(id, year, records) %>%
group_by(year) %>%
summarize(records = sum(records, na.rm = TRUE), datasets = n())## # A tibble: 4 × 3
## year records datasets
## <dbl> <int> <int>
## 1 2021 17326573 649
## 2 2022 14502286 391
## 3 2023 27529490 454
## 4 2024 8320863 325
Countries per year and classification:
recent_stats %>%
group_by(year, classification) %>%
summarize(records = sum(records, na.rm = TRUE), datasets = n(), countries = length(unique(country)))## # A tibble: 12 × 5
## # Groups: year [4]
## year classification records datasets countries
## <dbl> <fct> <int> <int> <int>
## 1 2021 SIDS 68960 4 4
## 2 2021 Africa 1982 3 3
## 3 2021 other 22060309 36 36
## 4 2022 SIDS 1054697 10 10
## 5 2022 Africa 252115 5 5
## 6 2022 other 19869472 42 42
## 7 2023 SIDS 87680 5 5
## 8 2023 Africa 24209 5 5
## 9 2023 other 35588728 32 32
## 10 2024 SIDS 411051 5 5
## 11 2024 Africa 11528 4 4
## 12 2024 other 9735449 38 38
All OBIS datasets so far
stats <- datasets %>%
distinct(id, records, year, country, sids, region_name, classification) %>%
filter(!is.na(country)) %>%
group_by(country, year, sids, region_name, classification) %>%
summarize(datasets = n(), records = sum(records)) %>%
arrange(desc(records))
stats %>%
group_by(country, classification) %>%
summarize(min_year = min(year)) %>%
group_by(classification, min_year) %>%
summarize(countries = n()) %>%
arrange(classification, !is.na(min_year), min_year) %>%
group_by(classification) %>%
mutate(cumulative_countries = cumsum(countries))## # A tibble: 15 × 4
## # Groups: classification [3]
## classification min_year countries cumulative_countries
## <fct> <dbl> <int> <int>
## 1 SIDS NA 13 13
## 2 SIDS 2019 1 14
## 3 SIDS 2020 3 17
## 4 SIDS 2021 1 18
## 5 SIDS 2022 3 21
## 6 Africa NA 14 14
## 7 Africa 2021 1 15
## 8 Africa 2022 1 16
## 9 Africa 2025 1 17
## 10 other NA 53 53
## 11 other 2019 1 54
## 12 other 2020 2 56
## 13 other 2022 3 59
## 14 other 2023 1 60
## 15 other 2024 1 61
IOC member states
ioc_members <- c("Afghanistan", "Albania", "Algeria", "Angola", "Argentina", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bangladesh", "Barbados", "Belgium", "Belize", "Benin", "Brazil", "Bulgaria", "Cabo Verde", "Cameroon", "Canada", "Chile", "China", "Colombia", "Comoros", "Congo", "Cook Islands", "Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", "Cyprus", "Czechia", "Democratic People's Republic of Korea", "Democratic Republic of the Congo", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Erytrea", "Estonia", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Iceland", "India", "Indonesia", "Iran (Islamic Republic of)", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Kuwait", "Lebanon", "Libya", "Madagascar", "Malaysia", "Maldives", "Malta", "Mauritania", "Mauritius", "Mexico", "Monaco", "Montenegro", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Netherlands", "New Zealand", "Nicaragua", "Nigeria", "Niue", "Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Republic of Korea", "Romania", "Russian Federation", "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syrian Arab Republic", "Thailand", "Timor-Leste", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Tuvalu", "Ukraine", "United Arab Emirates", "United Kingdom of Great Britain and Northern Ireland", "United Republic of Tanzania", "United States of America", "Uruguay", "Vanuatu", "Venezuela (Bolivarian Republic of)", "Viet Nam", "Yemen")## [1] "Algeria"
## [2] "Angola"
## [3] "Argentina"
## [4] "Australia"
## [5] "Austria"
## [6] "Bangladesh"
## [7] "Belgium"
## [8] "Benin"
## [9] "Brazil"
## [10] "Bulgaria"
## [11] "Cameroon"
## [12] "Canada"
## [13] "Cayman Islands"
## [14] "Chile"
## [15] "China"
## [16] "Colombia"
## [17] "Costa Rica"
## [18] "Croatia"
## [19] "Cuba"
## [20] "Cyprus"
## [21] "Czechia"
## [22] "Denmark"
## [23] "Dominican Republic"
## [24] "Ecuador"
## [25] "Egypt"
## [26] "Estonia"
## [27] "Fiji"
## [28] "Finland"
## [29] "France"
## [30] "Germany"
## [31] "Greece"
## [32] "Greenland"
## [33] "Guinea"
## [34] "Iceland"
## [35] "India"
## [36] "Indonesia"
## [37] "Iran (Islamic Republic of)"
## [38] "Ireland"
## [39] "Israel"
## [40] "Italy"
## [41] "Jamaica"
## [42] "Japan"
## [43] "Kenya"
## [44] "Kiribati"
## [45] "Lebanon"
## [46] "Lithuania"
## [47] "Malaysia"
## [48] "Malta"
## [49] "Marshall Islands"
## [50] "Mauritania"
## [51] "Mexico"
## [52] "Micronesia (Federated States of)"
## [53] "Montenegro"
## [54] "Morocco"
## [55] "Mozambique"
## [56] "Namibia"
## [57] "Nauru"
## [58] "Netherlands"
## [59] "New Caledonia"
## [60] "New Zealand"
## [61] "Nigeria"
## [62] "Niue"
## [63] "Norway"
## [64] "Palau"
## [65] "Papua New Guinea"
## [66] "Peru"
## [67] "Philippines"
## [68] "Poland"
## [69] "Portugal"
## [70] "Puerto Rico"
## [71] "Qatar"
## [72] "Republic of Korea"
## [73] "Russian Federation"
## [74] "Samoa"
## [75] "Senegal"
## [76] "Serbia"
## [77] "Singapore"
## [78] "Slovenia"
## [79] "Solomon Islands"
## [80] "South Africa"
## [81] "Spain"
## [82] "Sweden"
## [83] "Switzerland"
## [84] "Thailand"
## [85] "Togo"
## [86] "Tonga"
## [87] "Trinidad and Tobago"
## [88] "Tunisia"
## [89] "Türkiye"
## [90] "Tuvalu"
## [91] "Ukraine"
## [92] "United Kingdom of Great Britain and Northern Ireland"
## [93] "United Republic of Tanzania"
## [94] "United States of America"
## [95] "United States Virgin Islands"
## [96] "Uruguay"
## [97] "Vanuatu"
## [98] "Venezuela (Bolivarian Republic of)"
## [99] "Wallis and Futuna Islands"
## [1] "Afghanistan"
## [2] "Albania"
## [3] "Azerbaijan"
## [4] "Bahamas"
## [5] "Barbados"
## [6] "Belize"
## [7] "Cabo Verde"
## [8] "Comoros"
## [9] "Congo"
## [10] "Cook Islands"
## [11] "Cote d'Ivoire"
## [12] "Democratic People's Republic of Korea"
## [13] "Democratic Republic of the Congo"
## [14] "Djibouti"
## [15] "Dominica"
## [16] "El Salvador"
## [17] "Equatorial Guinea"
## [18] "Erytrea"
## [19] "Ethiopia"
## [20] "Gabon"
## [21] "Gambia"
## [22] "Georgia"
## [23] "Ghana"
## [24] "Grenada"
## [25] "Guatemala"
## [26] "Guinea-Bissau"
## [27] "Guyana"
## [28] "Haiti"
## [29] "Honduras"
## [30] "Iraq"
## [31] "Jordan"
## [32] "Kazakhstan"
## [33] "Kuwait"
## [34] "Libya"
## [35] "Madagascar"
## [36] "Maldives"
## [37] "Mauritius"
## [38] "Monaco"
## [39] "Myanmar"
## [40] "Nicaragua"
## [41] "Oman"
## [42] "Pakistan"
## [43] "Panama"
## [44] "Romania"
## [45] "Saint Kitts and Nevis"
## [46] "Saint Lucia"
## [47] "Saint Vincent and the Grenadines"
## [48] "Saudi Arabia"
## [49] "Seychelles"
## [50] "Sierra Leone"
## [51] "Somalia"
## [52] "Sri Lanka"
## [53] "Sudan"
## [54] "Suriname"
## [55] "Syrian Arab Republic"
## [56] "Timor-Leste"
## [57] "Turkey"
## [58] "Turkmenistan"
## [59] "United Arab Emirates"
## [60] "Viet Nam"
## [61] "Yemen"
## [1] "Puerto Rico" "United States Virgin Islands"
## [3] "New Caledonia" "Marshall Islands"
## [5] "Micronesia (Federated States of)" "Türkiye"
## [7] "Greenland" "Lithuania"
## [9] "Wallis and Futuna Islands" "Cayman Islands"
BioEco GeoNode monitoring programmes
layers <- jsonlite::fromJSON("https://geonode.goosocean.org/api/layers/")$objects
stopifnot(nrow(layers) < 1000)
regions <- unlist(layers$regions)Fix region names:
## [1] "Global" "Mediterranean Sea"
## [3] "Alaska" "California"
## [5] "Baltic Sea" "Queensland"
## [7] "Southern Ocean" "Turkey"
## [9] "Newfoundland and Labrador" "Faeroe Islands"
## [11] "United Kingdom" "Americas"
## [13] "Scotland" "Europe"
## [15] "North America" "Pacific"
## [17] "Baker Island" "Hawaii"
## [19] "Northwestern Hawaiian Islands" "Howland Island"
## [21] "Jarvis Islands" "Johnston Atoll"
## [23] "Kingman Reef" "Micronesia, Federated States of"
## [25] "Midway Islands" "Palmyra Atoll"
## [27] "Wake Island" "South America"
## [29] "Caribbean" "Asia"
## [31] "Middle East" "Cape Verde"
regions <- plyr::revalue(regions, c(
"United Kingdom" = "United Kingdom of Great Britain and Northern Ireland",
"Scotland" = "United Kingdom of Great Britain and Northern Ireland",
"Hawaii" = "United States of America",
"Alaska" = "United States of America",
"Queensland" = "Australia",
"Queensland" = "Australia",
"Newfoundland and Labrador" = "Canada",
"California" = "United States of America",
"Turkey" = "Türkiye",
"Micronesia, Federated States of" = "Micronesia (Federated States of)",
"Cape Verde" = "Cabo Verde",
"Faeroe Islands" = "Faroe Islands",
"Baker Island" = "United States of America",
"Northwestern Hawaiian Islands" = "United States of America",
"Howland Island" = "United States of America",
"Jarvis Islands" = "United States of America",
"Johnston Atoll" = "United States of America",
"Kingman Reef" = "United States of America",
"Micronesia, Federated States of" = "Micronesia (Federated States of)",
"Midway Islands" = "United States of America",
"Palmyra Atoll" = "United States of America",
"Wake Island" = "United States of America",
"Cape Verde" = "Cabo Verde"
))bioeco_countries <- countries %>%
filter(country_or_area %in% regions)
bioeco_countries %>%
group_by(classification) %>%
summarize(n())## # A tibble: 4 × 2
## classification `n()`
## <fct> <int>
## 1 SIDS 14
## 2 Africa 7
## 3 other 50
## 4 <NA> 1