Bundling Francis 1993 to a DwC Archive
This is an R Markdown Notebook for converting the species checklist found in the following reference to DarwinCore format for upload into OBIS as part of UNESCO’s eDNA Expeditions project:
Setup
Call the necessary libraries and variables. Suppresses loading messages.
library(magrittr) # To use %<>% pipes
suppressMessages(library(janitor)) # To clean input data
suppressMessages(library(dplyr)) # To clean input data
library(stringr) # To clean input data
suppressMessages(library(rgnparser)) # To clean species names
suppressMessages(library(taxize)) # To get WoRMS IDs
library(worrms) # To get WoRMS IDs
library(digest) # To generate hashes
suppressMessages(library(obistools)) # To generate centroid lat/long and uncertainty
suppressMessages(library(sf)) # To generate wkt polygon
suppressMessages(library(EML)) # To create eml.xml file
library(xml2) # To create the meta.xml file
suppressMessages(library(zip)) # To zip DwC file
Input Parameters and Paths
Parsing PDF table to CSV
The data for this reference is formatted as an image-based table inside a PDF across multiple sheets. First, we use pdf_to_table to OCR and parse out the table to a CSV.
#conda environment
condaenv <- "mwhs-data-mobilization"
# Path to the Python script
script <- paste(path_to_project_root, "scripts_data/pdf_to_tables/pdf_to_table.py", sep="/")
# Input PDF file path
input_pdf <- paste(path_to_project_root, "datasets", site_dir_name, dataset_dir_name, "raw", original_pdf, sep="/")
# Output directory for OCR/table files
output_dir <- paste(path_to_project_root, "datasets", site_dir_name, dataset_dir_name, "processed", sep="/")
# Define page numbers and table areas (see documentation)
page_args <- c(
"-a 185.463,24.165,618.111,305.999 -p 22",
"-a 140.621,45.911,630.341,330.619 -p 23",
"-a 140.043,27.077,627.428,311.239 -p 24",
"-a 141.199,44.179,617.059,323.689 -p 25",
"-a 141.492,23.437,618.92,306.422 -p 26",
"-a 144.572,47.8,630.781,326.387 -p 27",
"-a 143.838,16.112,630.135,305.547 -p 28",
"-a 145.604,57.359,630.952,336.802 -p 29",
"-a 146.317,19.567,625.863,302.856 -p 30",
"-a 145.745,52.492,640.165,334.598 -p 31",
"-a 150.617,23.285,649.924,309.193 -p 32",
"-a 148.863,51.199,621.199,329.984 -p 33"
)
# Define run parameters (see documentation)
run_parameters <- "-s -nh"
# Combine page arguments and execute
page_args_combined <- paste(page_args, collapse = " ")
command <- paste("conda run -n", condaenv, "python", script, "-i", input_pdf, run_parameters, page_args_combined, "-o", output_dir)
system(command, intern=TRUE)
## [1] ""
## [2] "Script Execution Summary"
## [3] "Date and Time: 2023-09-15 03:36:22"
## [4] "------------------------------"
## [5] ""
## [6] "PDF input: ../../../datasets/lord_howe_island_group/Francis_1993/raw/v47n2-136-170.pdf"
## [7] "Perform Table Parsing: TRUE"
## [8] "Selected Areas:"
## [9] " Area 1: [185.463, 24.165, 618.111, 305.999]"
## [10] " Area 2: [140.621, 45.911, 630.341, 330.619]"
## [11] " Area 3: [140.043, 27.077, 627.428, 311.239]"
## [12] " Area 4: [141.199, 44.179, 617.059, 323.689]"
## [13] " Area 5: [141.492, 23.437, 618.92, 306.422]"
## [14] " Area 6: [144.572, 47.8, 630.781, 326.387]"
## [15] " Area 7: [143.838, 16.112, 630.135, 305.547]"
## [16] " Area 8: [145.604, 57.359, 630.952, 336.802]"
## [17] " Area 9: [146.317, 19.567, 625.863, 302.856]"
## [18] " Area 10: [145.745, 52.492, 640.165, 334.598]"
## [19] " Area 11: [150.617, 23.285, 649.924, 309.193]"
## [20] " Area 12: [148.863, 51.199, 621.199, 329.984]"
## [21] "Pages: 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33"
## [22] "Concatenate: False"
## [23] "Concatenate across headers: True"
## [24] "Stream Extraction: True"
## [25] "Lattice Extraction: False"
## [26] ""
## [27] "Parsing Tables"
## [28] "------------------------------"
## [29] ""
## [30] ""
## [31] "Saving to CSV"
## [32] "CSV file(s):"
## [33] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_1.csv"
## [34] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_2.csv"
## [35] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_3.csv"
## [36] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_4.csv"
## [37] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_5.csv"
## [38] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_6.csv"
## [39] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_7.csv"
## [40] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_8.csv"
## [41] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_9.csv"
## [42] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_10.csv"
## [43] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_11.csv"
## [44] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_12.csv"
## [45] "------------------------------"
## [46] ""
## [47] ""
## [48] "Run Details: ../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_parameters.txt"
## [49] "Finished"
## [50] ""
Read source data
Now we’ll read in the csv table outputted from the previous step
file_list <- list.files(pattern = "v47n2-136-170_tables_parsed_\\d+.csv", path = paste(path_to_project_root, "datasets", site_dir_name, dataset_dir_name, "processed", sep = "/"), full.names = TRUE)
df_list <- list()
for (file in file_list) {
df <- read.csv(file, header = FALSE, stringsAsFactors = FALSE)
df[1, ] <- lapply(df[1, ], function(x) ifelse(grepl("^Unnamed", x), "", x))
first_col <- min(which(!is.na(df[1, ])))
last_col <- max(which(!is.na(df[1, ])))
df <- data.frame(
First_Column = df[ , first_col],
Last_Column = df[ , last_col],
stringsAsFactors = FALSE
)
df_list[[file]] <- df
}
input_data <- do.call(rbind, df_list)
rownames(input_data) <- NULL
colnames(input_data) <- c("taxa", "lord_howe_island")
#to preview pretty table
knitr::kable(head(input_data))
taxa | lord_howe_island |
---|---|
Odontaspididae | |
Odontaspis ferox (Risso, | |
Lamnidae | |
Carcharodon carcharias (Linnaeus, | |
Triakidae | |
Mustelus lenticulatus Phillipps, |
Preprocessing
Here we tidy the data up, since OCR and table parsing errors are common and only take the list of species, since this is a checklist.
Tidy Data
input_data %<>%
remove_empty(c("rows", "cols")) %>% # Remove empty rows and columns
clean_names()
#Remove rows with nothing in the last column (ie those that are not in Lord Howe Island)
input_data <- input_data[input_data$lord_howe_island != "", ]
# Remove Classes, Families and Orders and take first column only
cleaned_data <- input_data[,1]
#to preview pretty table
knitr::kable(head(cleaned_data))
x |
---|
Carcharhinus amblyrhynchos (Bleeker, |
Carcharhinus galapagensis (Snodgrass & Heller, |
Galeocerdo cuvier (peron & Le Sueur, |
Dasyatis thetidis Waite, 1899 |
Taeniura meyeni (Muller & Henle, |
Albula neoguinaica Valenciennes, |
Get WoRMS IDs
Auto matching
First we will try to do this automatically by first cleaning the species names using gnparser and then using the taxise library to call the WoRMS database.
#Parse author names out
parsed_names <- rgnparser::gn_parse(cleaned_data[])
#Function to get WoRMS IDs. Search for accepted names first and if not found, search for unaccepted. If still not found, use the worrms package to search.
get_worms_id_from_element <- function(element) {
worms_id <- get_wormsid(element$canonical$full, searchtype="scientific", fuzzy=TRUE, messages = FALSE, accepted = TRUE)
if (attr(worms_id, "match") == "not found") {
worms_id <- get_wormsid(element$canonical$full, searchtype="scientific", messages = FALSE, fuzzy=TRUE)
if (attr(worms_id, "match") == "not found") {
worms_id <- NA
}
}
return(worms_id)
}
#Call the function
worms_ids <- lapply(parsed_names, function(element) {
if (element$parsed) {
return(get_worms_id_from_element(element))
} else {
return(NA)
}
})
##
## id target
## 1 126224 Hippocampus
## 5 1525460 Hippocampus
## 6 275182 Hippocampus abdominalis
## 10 275183 Hippocampus alatus
## 11 275184 Hippocampus algiricus
## 12 275185 Hippocampus angustus
## 18 275186 Hippocampus barbouri
## 19 275187 Hippocampus bargibanti
## 23 212238 Hippocampus borboniensis
## 25 275189 Hippocampus breviceps
## 30 212233 Hippocampus camelopardalis
## 31 212234 Hippocampus capensis
## 32 886550 Hippocampus casscsio
## 34 275190 Hippocampus colemani
## 35 275191 Hippocampus comes
## 36 275192 Hippocampus coronatus
## 37 388711 Hippocampus curvicuspis
## 40 398432 Hippocampus debelius
## 41 275193 Hippocampus denise
## 44 159445 Hippocampus erectus
## 49 275194 Hippocampus fisheri
## 50 212230 Hippocampus fuscus
## 51 212230 Hippocampus fuscus
## 55 275195 Hippocampus grandiceps
## 56 154776 Hippocampus guttulatus
## 57 248042 Hippocampus guttulatus microstephanus
## 59 1376208 Hippocampus haema
## 60 275196 Hippocampus hendriki
## 64 127380 Hippocampus hippocampus
## 67 212239 Hippocampus histrix
## 71 275197 Hippocampus ingens
## 72 1288522 Hippocampus japapigu
## 74 275198 Hippocampus jayakari
## 75 275199 Hippocampus jugumus
## 78 212236 Hippocampus kelloggi
## 80 212237 Hippocampus kuda
## 84 275200 Hippocampus lichtensteinii
## 90 275201 Hippocampus minotaur
## 91 275202 Hippocampus mohnikei
## 96 275203 Hippocampus montebelloensis
## 97 275204 Hippocampus multispinus
## 98 1437127 Hippocampus nalu
## 106 712534 Hippocampus paradoxus
## 107 275205 Hippocampus patagonicus
## 109 306811 Hippocampus planifrons
## 112 398433 Hippocampus pontohi
## 116 388712 Hippocampus pusillus
## 117 275207 Hippocampus queenslandicus
## 121 159446 Hippocampus reidi
## 124 398434 Hippocampus satomiae
## 125 275208 Hippocampus semispinosus
## 127 275209 Hippocampus sindonis
## 128 275210 Hippocampus spinosissimus
## 131 275211 Hippocampus subelongatus
## 132 306822 Hippocampus suezensis
## 137 212232 Hippocampus trimaculatus
## 140 474956 Hippocampus tyro
## 143 398436 Hippocampus waleananus
## 144 212235 Hippocampus whitei
## 145 275212 Hippocampus zebra
## 146 275213 Hippocampus zosterae
## authority
## 1 Rafinesque, 1810
## 5 Rafinesque, 1810
## 6 Lesson, 1827
## 10 Kuiter, 2001
## 11 Kaup, 1856
## 12 Günther, 1870
## 18 Jordan & Richardson, 1908
## 19 Whitley, 1970
## 23 Duméril, 1870
## 25 Peters, 1869
## 30 Bianconi, 1854
## 31 Boulenger, 1900
## 32 Zhang, Qin, Wang & Lin, 2016
## 34 Kuiter, 2003
## 35 Cantor, 1849
## 36 Temminck & Schlegel, 1850
## 37 Fricke, 2004
## 40 Gomon & Kuiter, 2009
## 41 Lourie & Randall, 2003
## 44 Perry, 1810
## 49 Jordan & Evermann, 1903
## 50 Rüppell, 1838
## 51 Rüppell, 1838
## 55 Kuiter, 2001
## 56 Cuvier, 1829
## 57 Slastenenko, 1937
## 59 Han, Kim, Kai & Senou, 2017
## 60 Kuiter, 2001
## 64 (Linnaeus, 1758)
## 67 Kaup, 1856
## 71 Girard, 1858
## 72 Short, Smith, Motomura, Harasti & Hamilton, 2018
## 74 Boulenger, 1900
## 75 Kuiter, 2001
## 78 Jordan & Snyder, 1901
## 80 Bleeker, 1852
## 84 Kaup, 1856
## 90 Gomon, 1997
## 91 Bleeker, 1853
## 96 Kuiter, 2001
## 97 Kuiter, 2001
## 98 Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 106 Foster & Gomon, 2010
## 107 Piacentino & Luzzatto, 2004
## 109 Peters, 1877
## 112 Lourie & Kuiter, 2008
## 116 Fricke, 2004
## 117 Horne, 2001
## 121 Ginsburg, 1933
## 124 Lourie & Kuiter, 2008
## 125 Kuiter, 2001
## 127 Jordan & Snyder, 1901
## 128 Weber, 1913
## 131 Castelnau, 1873
## 132 Duncker, 1940
## 137 Leach, 1814
## 140 Randall & Lourie, 2009
## 143 Gomon & Kuiter, 2009
## 144 Bleeker, 1855
## 145 Whitley, 1964
## 146 Jordan & Gilbert, 1882
## status
## 1 accepted
## 5 accepted
## 6 accepted
## 10 accepted
## 11 accepted
## 12 accepted
## 18 accepted
## 19 accepted
## 23 accepted
## 25 accepted
## 30 accepted
## 31 accepted
## 32 accepted
## 34 accepted
## 35 accepted
## 36 accepted
## 37 accepted
## 40 accepted
## 41 accepted
## 44 accepted
## 49 accepted
## 50 accepted
## 51 accepted
## 55 accepted
## 56 accepted
## 57 accepted
## 59 accepted
## 60 accepted
## 64 accepted
## 67 accepted
## 71 accepted
## 72 accepted
## 74 accepted
## 75 accepted
## 78 accepted
## 80 accepted
## 84 accepted
## 90 accepted
## 91 accepted
## 96 accepted
## 97 accepted
## 98 accepted
## 106 accepted
## 107 accepted
## 109 accepted
## 112 accepted
## 116 accepted
## 117 accepted
## 121 accepted
## 124 accepted
## 125 accepted
## 127 accepted
## 128 accepted
## 131 accepted
## 132 accepted
## 137 accepted
## 140 accepted
## 143 accepted
## 144 accepted
## 145 accepted
## 146 accepted
##
## More than one WORMS ID found for taxon 'Hippocampus'!
##
## Enter rownumber of taxon (other inputs will return 'NA'):
## id target
## 1 126224 Hippocampus
## 5 1525460 Hippocampus
## 6 275182 Hippocampus abdominalis
## 7 306774 Hippocampus agnesae
## 8 306775 Hippocampus aimei
## 9 716772 Hippocampus aimei
## 10 275183 Hippocampus alatus
## 11 275184 Hippocampus algiricus
## 12 275185 Hippocampus angustus
## 13 306776 Hippocampus antiquorum
## 14 306777 Hippocampus antiquus
## 15 400954 Hippocampus arnei
## 16 716773 Hippocampus arnei
## 17 306778 Hippocampus aterrimus
## 18 275186 Hippocampus barbouri
## 19 275187 Hippocampus bargibanti
## 20 713214 Hippocampus bicuspis
## 21 275188 Hippocampus biocellatus
## 22 306779 Hippocampus bleekeri
## 23 212238 Hippocampus borboniensis
## 24 306780 Hippocampus brachyrhynchus
## 25 275189 Hippocampus breviceps
## 26 306781 Hippocampus brevirostris
## 27 306782 Hippocampus brunneus
## 28 400945 Hippocampus cameleopardalis
## 29 400946 Hippocampus cameleopardalus
## 30 212233 Hippocampus camelopardalis
## 31 212234 Hippocampus capensis
## 32 886550 Hippocampus casscsio
## 33 306783 Hippocampus chinensis
## 34 275190 Hippocampus colemani
## 35 275191 Hippocampus comes
## 36 275192 Hippocampus coronatus
## 37 388711 Hippocampus curvicuspis
## 38 306784 Hippocampus dahli
## 39 306785 Hippocampus deanei
## 40 398432 Hippocampus debelius
## 41 275193 Hippocampus denise
## 42 306786 Hippocampus ecuadorensis
## 43 306787 Hippocampus elongatus
## 44 159445 Hippocampus erectus
## 45 306788 Hippocampus erinaceus
## 46 154815 Hippocampus europaeus
## 47 306789 Hippocampus fascicularis
## 48 713215 Hippocampus filamentosus
## 49 275194 Hippocampus fisheri
## 50 212230 Hippocampus fuscus
## 51 212230 Hippocampus fuscus
## 52 306790 Hippocampus graciliformis
## 53 306791 Hippocampus gracilis
## 54 306792 Hippocampus gracilissimus
## 55 275195 Hippocampus grandiceps
## 56 154776 Hippocampus guttulatus
## 57 248042 Hippocampus guttulatus microstephanus
## 58 323136 Hippocampus guttulatus multiannularis
## 59 1376208 Hippocampus haema
## 60 275196 Hippocampus hendriki
## 61 154458 Hippocampus heptagonus
## 62 306793 Hippocampus hildebrandi
## 63 306794 Hippocampus hilonis
## 64 127380 Hippocampus hippocampus
## 65 322937 Hippocampus hippocampus microcoronatus
## 66 322938 Hippocampus hippocampus microstephanus
## 67 212239 Hippocampus histrix
## 68 306795 Hippocampus horai
## 69 306796 Hippocampus hudsonius
## 70 400949 Hippocampus hystrix
## 71 275197 Hippocampus ingens
## 72 1288522 Hippocampus japapigu
## 73 306797 Hippocampus japonicus
## 74 275198 Hippocampus jayakari
## 75 275199 Hippocampus jugumus
## 76 306798 Hippocampus kampylotrachelos
## 77 306799 Hippocampus kaupii
## 78 212236 Hippocampus kelloggi
## 79 306800 Hippocampus kincaidi
## 80 212237 Hippocampus kuda
## 81 323205 Hippocampus kuda multiannularis
## 82 306801 Hippocampus laevicaudatus
## 83 713217 Hippocampus lenis
## 84 275200 Hippocampus lichtensteinii
## 85 154777 Hippocampus longirostris
## 86 306802 Hippocampus manadensis
## 87 306803 Hippocampus mannulus
## 88 306804 Hippocampus marginalis
## 89 306805 Hippocampus melanospilos
## 90 275201 Hippocampus minotaur
## 91 275202 Hippocampus mohnikei
## 92 306806 Hippocampus moluccensis
## 93 400952 Hippocampus monckei
## 94 400951 Hippocampus monickei
## 95 400953 Hippocampus monikei
## 96 275203 Hippocampus montebelloensis
## 97 275204 Hippocampus multispinus
## 98 1437127 Hippocampus nalu
## 99 306807 Hippocampus natalensis
## 100 306808 Hippocampus novaehebudorum
## 101 400955 Hippocampus novaehollandae
## 102 306809 Hippocampus novaehollandiae
## 103 713212 Hippocampus obscurus
## 104 713213 Hippocampus obscurus
## 105 306810 Hippocampus obtusus
## 106 712534 Hippocampus paradoxus
## 107 275205 Hippocampus patagonicus
## 108 400947 Hippocampus pentagonus
## 109 306811 Hippocampus planifrons
## 110 306812 Hippocampus poeyi
## 111 306813 Hippocampus polytaenia
## 112 398433 Hippocampus pontohi
## 113 275206 Hippocampus procerus
## 114 306814 Hippocampus punctulatus
## 115 306815 Hippocampus punctulatus
## 116 388712 Hippocampus pusillus
## 117 275207 Hippocampus queenslandicus
## 118 306816 Hippocampus raji
## 119 127381 Hippocampus ramulosus
## 120 306817 Hippocampus regulus
## 121 159446 Hippocampus reidi
## 122 306818 Hippocampus rhynchomacer
## 123 306819 Hippocampus rosamondae
## 124 398434 Hippocampus satomiae
## 125 275208 Hippocampus semispinosus
## 126 398435 Hippocampus severnsi
## 127 275209 Hippocampus sindonis
## 128 275210 Hippocampus spinosissimus
## 129 306820 Hippocampus stylifer
## 130 306821 Hippocampus subcoronatus
## 131 275211 Hippocampus subelongatus
## 132 306822 Hippocampus suezensis
## 133 306823 Hippocampus taeniops
## 134 306824 Hippocampus taeniopterus
## 135 306825 Hippocampus takakurae
## 136 306826 Hippocampus tetragonous
## 137 212232 Hippocampus trimaculatus
## 138 306827 Hippocampus tristis
## 139 306828 Hippocampus tuberculatus
## 140 474956 Hippocampus tyro
## 141 306829 Hippocampus villosus
## 142 306830 Hippocampus vulgaris
## 143 398436 Hippocampus waleananus
## 144 212235 Hippocampus whitei
## 145 275212 Hippocampus zebra
## 146 275213 Hippocampus zosterae
## 2 843450 <NA>
## 3 843451 <NA>
## 4 843452 <NA>
## authority
## 1 Rafinesque, 1810
## 5 Rafinesque, 1810
## 6 Lesson, 1827
## 7 Fowler, 1907
## 8 Roule, 1916
## 9 Roule, 1916
## 10 Kuiter, 2001
## 11 Kaup, 1856
## 12 Günther, 1870
## 13 Leach, 1814
## 14 Risso, 1827
## 15 Roule, 1916
## 16 Roule, 1916
## 17 Jordan & Snyder, 1902
## 18 Jordan & Richardson, 1908
## 19 Whitley, 1970
## 20 Kaup, 1856
## 21 Kuiter, 2001
## 22 Fowler, 1907
## 23 Duméril, 1870
## 24 Duncker, 1914
## 25 Peters, 1869
## 26 Schinz, 1822
## 27 Bean, 1906
## 28 Bianconi, 1854
## 29 Bianconi, 1854
## 30 Bianconi, 1854
## 31 Boulenger, 1900
## 32 Zhang, Qin, Wang & Lin, 2016
## 33 Basilewsky, 1855
## 34 Kuiter, 2003
## 35 Cantor, 1849
## 36 Temminck & Schlegel, 1850
## 37 Fricke, 2004
## 38 Ogilby, 1908
## 39 Duméril, 1861
## 40 Gomon & Kuiter, 2009
## 41 Lourie & Randall, 2003
## 42 Fowler, 1922
## 43 Castelnau, 1873
## 44 Perry, 1810
## 45 Günther, 1870
## 46 Ginsburg, 1933
## 47 Kaup, 1856
## 48 Duméril, 1870
## 49 Jordan & Evermann, 1903
## 50 Rüppell, 1838
## 51 Rüppell, 1838
## 52 McCulloch, 1911
## 53 Gill, 1862
## 54 Temminck & Schlegel, 1850
## 55 Kuiter, 2001
## 56 Cuvier, 1829
## 57 Slastenenko, 1937
## 58 Ginsburg, 1937
## 59 Han, Kim, Kai & Senou, 2017
## 60 Kuiter, 2001
## 61 Rafinesque, 1810
## 62 Ginsburg, 1933
## 63 Jordan & Evermann, 1903
## 64 (Linnaeus, 1758)
## 65 Slastenenko, 1938
## 66 Slastenenko, 1937
## 67 Kaup, 1856
## 68 Duncker, 1926
## 69 DeKay, 1842
## 70 Kaup, 1856
## 71 Girard, 1858
## 72 Short, Smith, Motomura, Harasti & Hamilton, 2018
## 73 Kaup, 1856
## 74 Boulenger, 1900
## 75 Kuiter, 2001
## 76 Bleeker, 1854
## 77 Duméril, 1870
## 78 Jordan & Snyder, 1901
## 79 Townsend & Barbour, 1906
## 80 Bleeker, 1852
## 81 Raj, 1941
## 82 Kaup, 1856
## 83 De Vis, 1908
## 84 Kaup, 1856
## 85 Schinz, 1822
## 86 Bleeker, 1856
## 87 Cantor, 1849
## 88 Kaup, 1856
## 89 Bleeker, 1854
## 90 Gomon, 1997
## 91 Bleeker, 1853
## 92 Bleeker, 1852
## 93 Bleeker, 1853
## 94 Bleeker, 1853
## 95 Bleeker, 1853
## 96 Kuiter, 2001
## 97 Kuiter, 2001
## 98 Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 99 von Bonde, 1923
## 100 Fowler, 1944
## 101 Steindachner, 1866
## 102 Steindachner, 1866
## 103 Hemprich & Ehrenberg, 1856
## 104 Ehrenberg, 1871
## 105 Ginsburg, 1933
## 106 Foster & Gomon, 2010
## 107 Piacentino & Luzzatto, 2004
## 108 Rafinesque, 1810
## 109 Peters, 1877
## 110 Howell Rivero, 1934
## 111 Bleeker, 1854
## 112 Lourie & Kuiter, 2008
## 113 Kuiter, 2001
## 114 Guichenot, 1853
## 115 Kaup, 1856
## 116 Fricke, 2004
## 117 Horne, 2001
## 118 Whitley, 1955
## 119 Leach, 1814
## 120 Ginsburg, 1933
## 121 Ginsburg, 1933
## 122 Duméril, 1870
## 123 Borodin, 1928
## 124 Lourie & Kuiter, 2008
## 125 Kuiter, 2001
## 126 Lourie & Kuiter, 2008
## 127 Jordan & Snyder, 1901
## 128 Weber, 1913
## 129 Jordan & Gilbert, 1882
## 130 Günther, 1866
## 131 Castelnau, 1873
## 132 Duncker, 1940
## 133 Fowler, 1904
## 134 Bleeker, 1852
## 135 Tanaka, 1916
## 136 (Mitchill, 1814)
## 137 Leach, 1814
## 138 Castelnau, 1872
## 139 Castelnau, 1875
## 140 Randall & Lourie, 2009
## 141 Günther, 1880
## 142 Cloquet, 1821
## 143 Gomon & Kuiter, 2009
## 144 Bleeker, 1855
## 145 Whitley, 1964
## 146 Jordan & Gilbert, 1882
## 2 <NA>
## 3 <NA>
## 4 <NA>
## status
## 1 accepted
## 5 accepted
## 6 accepted
## 7 unaccepted
## 8 unaccepted
## 9 unaccepted
## 10 accepted
## 11 accepted
## 12 accepted
## 13 unaccepted
## 14 unaccepted
## 15 unaccepted
## 16 unaccepted
## 17 unaccepted
## 18 accepted
## 19 accepted
## 20 unaccepted
## 21 unaccepted
## 22 unaccepted
## 23 accepted
## 24 unaccepted
## 25 accepted
## 26 unaccepted
## 27 unaccepted
## 28 unaccepted
## 29 unaccepted
## 30 accepted
## 31 accepted
## 32 accepted
## 33 unaccepted
## 34 accepted
## 35 accepted
## 36 accepted
## 37 accepted
## 38 unaccepted
## 39 unaccepted
## 40 accepted
## 41 accepted
## 42 unaccepted
## 43 unaccepted
## 44 accepted
## 45 unaccepted
## 46 unaccepted
## 47 unaccepted
## 48 unaccepted
## 49 accepted
## 50 accepted
## 51 accepted
## 52 unaccepted
## 53 unaccepted
## 54 unaccepted
## 55 accepted
## 56 accepted
## 57 accepted
## 58 unaccepted
## 59 accepted
## 60 accepted
## 61 unaccepted
## 62 unaccepted
## 63 unaccepted
## 64 accepted
## 65 unaccepted
## 66 unaccepted
## 67 accepted
## 68 unaccepted
## 69 unaccepted
## 70 unaccepted
## 71 accepted
## 72 accepted
## 73 unaccepted
## 74 accepted
## 75 accepted
## 76 unaccepted
## 77 unaccepted
## 78 accepted
## 79 unaccepted
## 80 accepted
## 81 unaccepted
## 82 unaccepted
## 83 unaccepted
## 84 accepted
## 85 unaccepted
## 86 unaccepted
## 87 unaccepted
## 88 unaccepted
## 89 unaccepted
## 90 accepted
## 91 accepted
## 92 unaccepted
## 93 unaccepted
## 94 unaccepted
## 95 unaccepted
## 96 accepted
## 97 accepted
## 98 accepted
## 99 unaccepted
## 100 unaccepted
## 101 unaccepted
## 102 unaccepted
## 103 unaccepted
## 104 unaccepted
## 105 unaccepted
## 106 accepted
## 107 accepted
## 108 unaccepted
## 109 accepted
## 110 unaccepted
## 111 unaccepted
## 112 accepted
## 113 unaccepted
## 114 unaccepted
## 115 unaccepted
## 116 accepted
## 117 accepted
## 118 unaccepted
## 119 unaccepted
## 120 unaccepted
## 121 accepted
## 122 unaccepted
## 123 unaccepted
## 124 accepted
## 125 accepted
## 126 unaccepted
## 127 accepted
## 128 accepted
## 129 unaccepted
## 130 unaccepted
## 131 accepted
## 132 accepted
## 133 unaccepted
## 134 unaccepted
## 135 unaccepted
## 136 unaccepted
## 137 accepted
## 138 unaccepted
## 139 unaccepted
## 140 accepted
## 141 unaccepted
## 142 unaccepted
## 143 accepted
## 144 accepted
## 145 accepted
## 146 accepted
## 2 quarantined
## 3 quarantined
## 4 quarantined
##
## More than one WORMS ID found for taxon 'Hippocampus'!
##
## Enter rownumber of taxon (other inputs will return 'NA'):
## id target
## 1 126224 Hippocampus
## 5 1525460 Hippocampus
## 6 275182 Hippocampus abdominalis
## 10 275183 Hippocampus alatus
## 11 275184 Hippocampus algiricus
## 12 275185 Hippocampus angustus
## 18 275186 Hippocampus barbouri
## 19 275187 Hippocampus bargibanti
## 23 212238 Hippocampus borboniensis
## 25 275189 Hippocampus breviceps
## 30 212233 Hippocampus camelopardalis
## 31 212234 Hippocampus capensis
## 32 886550 Hippocampus casscsio
## 34 275190 Hippocampus colemani
## 35 275191 Hippocampus comes
## 36 275192 Hippocampus coronatus
## 37 388711 Hippocampus curvicuspis
## 40 398432 Hippocampus debelius
## 41 275193 Hippocampus denise
## 44 159445 Hippocampus erectus
## 49 275194 Hippocampus fisheri
## 50 212230 Hippocampus fuscus
## 51 212230 Hippocampus fuscus
## 55 275195 Hippocampus grandiceps
## 56 154776 Hippocampus guttulatus
## 57 248042 Hippocampus guttulatus microstephanus
## 59 1376208 Hippocampus haema
## 60 275196 Hippocampus hendriki
## 64 127380 Hippocampus hippocampus
## 67 212239 Hippocampus histrix
## 71 275197 Hippocampus ingens
## 72 1288522 Hippocampus japapigu
## 74 275198 Hippocampus jayakari
## 75 275199 Hippocampus jugumus
## 78 212236 Hippocampus kelloggi
## 80 212237 Hippocampus kuda
## 84 275200 Hippocampus lichtensteinii
## 90 275201 Hippocampus minotaur
## 91 275202 Hippocampus mohnikei
## 96 275203 Hippocampus montebelloensis
## 97 275204 Hippocampus multispinus
## 98 1437127 Hippocampus nalu
## 106 712534 Hippocampus paradoxus
## 107 275205 Hippocampus patagonicus
## 109 306811 Hippocampus planifrons
## 112 398433 Hippocampus pontohi
## 116 388712 Hippocampus pusillus
## 117 275207 Hippocampus queenslandicus
## 121 159446 Hippocampus reidi
## 124 398434 Hippocampus satomiae
## 125 275208 Hippocampus semispinosus
## 127 275209 Hippocampus sindonis
## 128 275210 Hippocampus spinosissimus
## 131 275211 Hippocampus subelongatus
## 132 306822 Hippocampus suezensis
## 137 212232 Hippocampus trimaculatus
## 140 474956 Hippocampus tyro
## 143 398436 Hippocampus waleananus
## 144 212235 Hippocampus whitei
## 145 275212 Hippocampus zebra
## 146 275213 Hippocampus zosterae
## authority
## 1 Rafinesque, 1810
## 5 Rafinesque, 1810
## 6 Lesson, 1827
## 10 Kuiter, 2001
## 11 Kaup, 1856
## 12 Günther, 1870
## 18 Jordan & Richardson, 1908
## 19 Whitley, 1970
## 23 Duméril, 1870
## 25 Peters, 1869
## 30 Bianconi, 1854
## 31 Boulenger, 1900
## 32 Zhang, Qin, Wang & Lin, 2016
## 34 Kuiter, 2003
## 35 Cantor, 1849
## 36 Temminck & Schlegel, 1850
## 37 Fricke, 2004
## 40 Gomon & Kuiter, 2009
## 41 Lourie & Randall, 2003
## 44 Perry, 1810
## 49 Jordan & Evermann, 1903
## 50 Rüppell, 1838
## 51 Rüppell, 1838
## 55 Kuiter, 2001
## 56 Cuvier, 1829
## 57 Slastenenko, 1937
## 59 Han, Kim, Kai & Senou, 2017
## 60 Kuiter, 2001
## 64 (Linnaeus, 1758)
## 67 Kaup, 1856
## 71 Girard, 1858
## 72 Short, Smith, Motomura, Harasti & Hamilton, 2018
## 74 Boulenger, 1900
## 75 Kuiter, 2001
## 78 Jordan & Snyder, 1901
## 80 Bleeker, 1852
## 84 Kaup, 1856
## 90 Gomon, 1997
## 91 Bleeker, 1853
## 96 Kuiter, 2001
## 97 Kuiter, 2001
## 98 Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 106 Foster & Gomon, 2010
## 107 Piacentino & Luzzatto, 2004
## 109 Peters, 1877
## 112 Lourie & Kuiter, 2008
## 116 Fricke, 2004
## 117 Horne, 2001
## 121 Ginsburg, 1933
## 124 Lourie & Kuiter, 2008
## 125 Kuiter, 2001
## 127 Jordan & Snyder, 1901
## 128 Weber, 1913
## 131 Castelnau, 1873
## 132 Duncker, 1940
## 137 Leach, 1814
## 140 Randall & Lourie, 2009
## 143 Gomon & Kuiter, 2009
## 144 Bleeker, 1855
## 145 Whitley, 1964
## 146 Jordan & Gilbert, 1882
## status
## 1 accepted
## 5 accepted
## 6 accepted
## 10 accepted
## 11 accepted
## 12 accepted
## 18 accepted
## 19 accepted
## 23 accepted
## 25 accepted
## 30 accepted
## 31 accepted
## 32 accepted
## 34 accepted
## 35 accepted
## 36 accepted
## 37 accepted
## 40 accepted
## 41 accepted
## 44 accepted
## 49 accepted
## 50 accepted
## 51 accepted
## 55 accepted
## 56 accepted
## 57 accepted
## 59 accepted
## 60 accepted
## 64 accepted
## 67 accepted
## 71 accepted
## 72 accepted
## 74 accepted
## 75 accepted
## 78 accepted
## 80 accepted
## 84 accepted
## 90 accepted
## 91 accepted
## 96 accepted
## 97 accepted
## 98 accepted
## 106 accepted
## 107 accepted
## 109 accepted
## 112 accepted
## 116 accepted
## 117 accepted
## 121 accepted
## 124 accepted
## 125 accepted
## 127 accepted
## 128 accepted
## 131 accepted
## 132 accepted
## 137 accepted
## 140 accepted
## 143 accepted
## 144 accepted
## 145 accepted
## 146 accepted
##
## More than one WORMS ID found for taxon 'Hippocampus'!
##
## Enter rownumber of taxon (other inputs will return 'NA'):
## id target
## 1 126224 Hippocampus
## 5 1525460 Hippocampus
## 6 275182 Hippocampus abdominalis
## 7 306774 Hippocampus agnesae
## 8 306775 Hippocampus aimei
## 9 716772 Hippocampus aimei
## 10 275183 Hippocampus alatus
## 11 275184 Hippocampus algiricus
## 12 275185 Hippocampus angustus
## 13 306776 Hippocampus antiquorum
## 14 306777 Hippocampus antiquus
## 15 400954 Hippocampus arnei
## 16 716773 Hippocampus arnei
## 17 306778 Hippocampus aterrimus
## 18 275186 Hippocampus barbouri
## 19 275187 Hippocampus bargibanti
## 20 713214 Hippocampus bicuspis
## 21 275188 Hippocampus biocellatus
## 22 306779 Hippocampus bleekeri
## 23 212238 Hippocampus borboniensis
## 24 306780 Hippocampus brachyrhynchus
## 25 275189 Hippocampus breviceps
## 26 306781 Hippocampus brevirostris
## 27 306782 Hippocampus brunneus
## 28 400945 Hippocampus cameleopardalis
## 29 400946 Hippocampus cameleopardalus
## 30 212233 Hippocampus camelopardalis
## 31 212234 Hippocampus capensis
## 32 886550 Hippocampus casscsio
## 33 306783 Hippocampus chinensis
## 34 275190 Hippocampus colemani
## 35 275191 Hippocampus comes
## 36 275192 Hippocampus coronatus
## 37 388711 Hippocampus curvicuspis
## 38 306784 Hippocampus dahli
## 39 306785 Hippocampus deanei
## 40 398432 Hippocampus debelius
## 41 275193 Hippocampus denise
## 42 306786 Hippocampus ecuadorensis
## 43 306787 Hippocampus elongatus
## 44 159445 Hippocampus erectus
## 45 306788 Hippocampus erinaceus
## 46 154815 Hippocampus europaeus
## 47 306789 Hippocampus fascicularis
## 48 713215 Hippocampus filamentosus
## 49 275194 Hippocampus fisheri
## 50 212230 Hippocampus fuscus
## 51 212230 Hippocampus fuscus
## 52 306790 Hippocampus graciliformis
## 53 306791 Hippocampus gracilis
## 54 306792 Hippocampus gracilissimus
## 55 275195 Hippocampus grandiceps
## 56 154776 Hippocampus guttulatus
## 57 248042 Hippocampus guttulatus microstephanus
## 58 323136 Hippocampus guttulatus multiannularis
## 59 1376208 Hippocampus haema
## 60 275196 Hippocampus hendriki
## 61 154458 Hippocampus heptagonus
## 62 306793 Hippocampus hildebrandi
## 63 306794 Hippocampus hilonis
## 64 127380 Hippocampus hippocampus
## 65 322937 Hippocampus hippocampus microcoronatus
## 66 322938 Hippocampus hippocampus microstephanus
## 67 212239 Hippocampus histrix
## 68 306795 Hippocampus horai
## 69 306796 Hippocampus hudsonius
## 70 400949 Hippocampus hystrix
## 71 275197 Hippocampus ingens
## 72 1288522 Hippocampus japapigu
## 73 306797 Hippocampus japonicus
## 74 275198 Hippocampus jayakari
## 75 275199 Hippocampus jugumus
## 76 306798 Hippocampus kampylotrachelos
## 77 306799 Hippocampus kaupii
## 78 212236 Hippocampus kelloggi
## 79 306800 Hippocampus kincaidi
## 80 212237 Hippocampus kuda
## 81 323205 Hippocampus kuda multiannularis
## 82 306801 Hippocampus laevicaudatus
## 83 713217 Hippocampus lenis
## 84 275200 Hippocampus lichtensteinii
## 85 154777 Hippocampus longirostris
## 86 306802 Hippocampus manadensis
## 87 306803 Hippocampus mannulus
## 88 306804 Hippocampus marginalis
## 89 306805 Hippocampus melanospilos
## 90 275201 Hippocampus minotaur
## 91 275202 Hippocampus mohnikei
## 92 306806 Hippocampus moluccensis
## 93 400952 Hippocampus monckei
## 94 400951 Hippocampus monickei
## 95 400953 Hippocampus monikei
## 96 275203 Hippocampus montebelloensis
## 97 275204 Hippocampus multispinus
## 98 1437127 Hippocampus nalu
## 99 306807 Hippocampus natalensis
## 100 306808 Hippocampus novaehebudorum
## 101 400955 Hippocampus novaehollandae
## 102 306809 Hippocampus novaehollandiae
## 103 713212 Hippocampus obscurus
## 104 713213 Hippocampus obscurus
## 105 306810 Hippocampus obtusus
## 106 712534 Hippocampus paradoxus
## 107 275205 Hippocampus patagonicus
## 108 400947 Hippocampus pentagonus
## 109 306811 Hippocampus planifrons
## 110 306812 Hippocampus poeyi
## 111 306813 Hippocampus polytaenia
## 112 398433 Hippocampus pontohi
## 113 275206 Hippocampus procerus
## 114 306814 Hippocampus punctulatus
## 115 306815 Hippocampus punctulatus
## 116 388712 Hippocampus pusillus
## 117 275207 Hippocampus queenslandicus
## 118 306816 Hippocampus raji
## 119 127381 Hippocampus ramulosus
## 120 306817 Hippocampus regulus
## 121 159446 Hippocampus reidi
## 122 306818 Hippocampus rhynchomacer
## 123 306819 Hippocampus rosamondae
## 124 398434 Hippocampus satomiae
## 125 275208 Hippocampus semispinosus
## 126 398435 Hippocampus severnsi
## 127 275209 Hippocampus sindonis
## 128 275210 Hippocampus spinosissimus
## 129 306820 Hippocampus stylifer
## 130 306821 Hippocampus subcoronatus
## 131 275211 Hippocampus subelongatus
## 132 306822 Hippocampus suezensis
## 133 306823 Hippocampus taeniops
## 134 306824 Hippocampus taeniopterus
## 135 306825 Hippocampus takakurae
## 136 306826 Hippocampus tetragonous
## 137 212232 Hippocampus trimaculatus
## 138 306827 Hippocampus tristis
## 139 306828 Hippocampus tuberculatus
## 140 474956 Hippocampus tyro
## 141 306829 Hippocampus villosus
## 142 306830 Hippocampus vulgaris
## 143 398436 Hippocampus waleananus
## 144 212235 Hippocampus whitei
## 145 275212 Hippocampus zebra
## 146 275213 Hippocampus zosterae
## 2 843450 <NA>
## 3 843451 <NA>
## 4 843452 <NA>
## authority
## 1 Rafinesque, 1810
## 5 Rafinesque, 1810
## 6 Lesson, 1827
## 7 Fowler, 1907
## 8 Roule, 1916
## 9 Roule, 1916
## 10 Kuiter, 2001
## 11 Kaup, 1856
## 12 Günther, 1870
## 13 Leach, 1814
## 14 Risso, 1827
## 15 Roule, 1916
## 16 Roule, 1916
## 17 Jordan & Snyder, 1902
## 18 Jordan & Richardson, 1908
## 19 Whitley, 1970
## 20 Kaup, 1856
## 21 Kuiter, 2001
## 22 Fowler, 1907
## 23 Duméril, 1870
## 24 Duncker, 1914
## 25 Peters, 1869
## 26 Schinz, 1822
## 27 Bean, 1906
## 28 Bianconi, 1854
## 29 Bianconi, 1854
## 30 Bianconi, 1854
## 31 Boulenger, 1900
## 32 Zhang, Qin, Wang & Lin, 2016
## 33 Basilewsky, 1855
## 34 Kuiter, 2003
## 35 Cantor, 1849
## 36 Temminck & Schlegel, 1850
## 37 Fricke, 2004
## 38 Ogilby, 1908
## 39 Duméril, 1861
## 40 Gomon & Kuiter, 2009
## 41 Lourie & Randall, 2003
## 42 Fowler, 1922
## 43 Castelnau, 1873
## 44 Perry, 1810
## 45 Günther, 1870
## 46 Ginsburg, 1933
## 47 Kaup, 1856
## 48 Duméril, 1870
## 49 Jordan & Evermann, 1903
## 50 Rüppell, 1838
## 51 Rüppell, 1838
## 52 McCulloch, 1911
## 53 Gill, 1862
## 54 Temminck & Schlegel, 1850
## 55 Kuiter, 2001
## 56 Cuvier, 1829
## 57 Slastenenko, 1937
## 58 Ginsburg, 1937
## 59 Han, Kim, Kai & Senou, 2017
## 60 Kuiter, 2001
## 61 Rafinesque, 1810
## 62 Ginsburg, 1933
## 63 Jordan & Evermann, 1903
## 64 (Linnaeus, 1758)
## 65 Slastenenko, 1938
## 66 Slastenenko, 1937
## 67 Kaup, 1856
## 68 Duncker, 1926
## 69 DeKay, 1842
## 70 Kaup, 1856
## 71 Girard, 1858
## 72 Short, Smith, Motomura, Harasti & Hamilton, 2018
## 73 Kaup, 1856
## 74 Boulenger, 1900
## 75 Kuiter, 2001
## 76 Bleeker, 1854
## 77 Duméril, 1870
## 78 Jordan & Snyder, 1901
## 79 Townsend & Barbour, 1906
## 80 Bleeker, 1852
## 81 Raj, 1941
## 82 Kaup, 1856
## 83 De Vis, 1908
## 84 Kaup, 1856
## 85 Schinz, 1822
## 86 Bleeker, 1856
## 87 Cantor, 1849
## 88 Kaup, 1856
## 89 Bleeker, 1854
## 90 Gomon, 1997
## 91 Bleeker, 1853
## 92 Bleeker, 1852
## 93 Bleeker, 1853
## 94 Bleeker, 1853
## 95 Bleeker, 1853
## 96 Kuiter, 2001
## 97 Kuiter, 2001
## 98 Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 99 von Bonde, 1923
## 100 Fowler, 1944
## 101 Steindachner, 1866
## 102 Steindachner, 1866
## 103 Hemprich & Ehrenberg, 1856
## 104 Ehrenberg, 1871
## 105 Ginsburg, 1933
## 106 Foster & Gomon, 2010
## 107 Piacentino & Luzzatto, 2004
## 108 Rafinesque, 1810
## 109 Peters, 1877
## 110 Howell Rivero, 1934
## 111 Bleeker, 1854
## 112 Lourie & Kuiter, 2008
## 113 Kuiter, 2001
## 114 Guichenot, 1853
## 115 Kaup, 1856
## 116 Fricke, 2004
## 117 Horne, 2001
## 118 Whitley, 1955
## 119 Leach, 1814
## 120 Ginsburg, 1933
## 121 Ginsburg, 1933
## 122 Duméril, 1870
## 123 Borodin, 1928
## 124 Lourie & Kuiter, 2008
## 125 Kuiter, 2001
## 126 Lourie & Kuiter, 2008
## 127 Jordan & Snyder, 1901
## 128 Weber, 1913
## 129 Jordan & Gilbert, 1882
## 130 Günther, 1866
## 131 Castelnau, 1873
## 132 Duncker, 1940
## 133 Fowler, 1904
## 134 Bleeker, 1852
## 135 Tanaka, 1916
## 136 (Mitchill, 1814)
## 137 Leach, 1814
## 138 Castelnau, 1872
## 139 Castelnau, 1875
## 140 Randall & Lourie, 2009
## 141 Günther, 1880
## 142 Cloquet, 1821
## 143 Gomon & Kuiter, 2009
## 144 Bleeker, 1855
## 145 Whitley, 1964
## 146 Jordan & Gilbert, 1882
## 2 <NA>
## 3 <NA>
## 4 <NA>
## status
## 1 accepted
## 5 accepted
## 6 accepted
## 7 unaccepted
## 8 unaccepted
## 9 unaccepted
## 10 accepted
## 11 accepted
## 12 accepted
## 13 unaccepted
## 14 unaccepted
## 15 unaccepted
## 16 unaccepted
## 17 unaccepted
## 18 accepted
## 19 accepted
## 20 unaccepted
## 21 unaccepted
## 22 unaccepted
## 23 accepted
## 24 unaccepted
## 25 accepted
## 26 unaccepted
## 27 unaccepted
## 28 unaccepted
## 29 unaccepted
## 30 accepted
## 31 accepted
## 32 accepted
## 33 unaccepted
## 34 accepted
## 35 accepted
## 36 accepted
## 37 accepted
## 38 unaccepted
## 39 unaccepted
## 40 accepted
## 41 accepted
## 42 unaccepted
## 43 unaccepted
## 44 accepted
## 45 unaccepted
## 46 unaccepted
## 47 unaccepted
## 48 unaccepted
## 49 accepted
## 50 accepted
## 51 accepted
## 52 unaccepted
## 53 unaccepted
## 54 unaccepted
## 55 accepted
## 56 accepted
## 57 accepted
## 58 unaccepted
## 59 accepted
## 60 accepted
## 61 unaccepted
## 62 unaccepted
## 63 unaccepted
## 64 accepted
## 65 unaccepted
## 66 unaccepted
## 67 accepted
## 68 unaccepted
## 69 unaccepted
## 70 unaccepted
## 71 accepted
## 72 accepted
## 73 unaccepted
## 74 accepted
## 75 accepted
## 76 unaccepted
## 77 unaccepted
## 78 accepted
## 79 unaccepted
## 80 accepted
## 81 unaccepted
## 82 unaccepted
## 83 unaccepted
## 84 accepted
## 85 unaccepted
## 86 unaccepted
## 87 unaccepted
## 88 unaccepted
## 89 unaccepted
## 90 accepted
## 91 accepted
## 92 unaccepted
## 93 unaccepted
## 94 unaccepted
## 95 unaccepted
## 96 accepted
## 97 accepted
## 98 accepted
## 99 unaccepted
## 100 unaccepted
## 101 unaccepted
## 102 unaccepted
## 103 unaccepted
## 104 unaccepted
## 105 unaccepted
## 106 accepted
## 107 accepted
## 108 unaccepted
## 109 accepted
## 110 unaccepted
## 111 unaccepted
## 112 accepted
## 113 unaccepted
## 114 unaccepted
## 115 unaccepted
## 116 accepted
## 117 accepted
## 118 unaccepted
## 119 unaccepted
## 120 unaccepted
## 121 accepted
## 122 unaccepted
## 123 unaccepted
## 124 accepted
## 125 accepted
## 126 unaccepted
## 127 accepted
## 128 accepted
## 129 unaccepted
## 130 unaccepted
## 131 accepted
## 132 accepted
## 133 unaccepted
## 134 unaccepted
## 135 unaccepted
## 136 unaccepted
## 137 accepted
## 138 unaccepted
## 139 unaccepted
## 140 accepted
## 141 unaccepted
## 142 unaccepted
## 143 accepted
## 144 accepted
## 145 accepted
## 146 accepted
## 2 quarantined
## 3 quarantined
## 4 quarantined
##
## More than one WORMS ID found for taxon 'Hippocampus'!
##
## Enter rownumber of taxon (other inputs will return 'NA'):
## id target authority status
## 2 835080 Novaculops alvheimi Randall, 2013 accepted
## 3 1431860 Novaculops compressus Fukui, 2020 accepted
## 4 1009705 Novaculops halsteadi (Randall & Lobel, 2003) accepted
## 5 1009528 Novaculops koteamea (Randall & Allen, 2004) accepted
## 6 1008571 Novaculops pastellus (Randall, Earle & Rocha, 2008) accepted
## 7 1017252 Novaculops sciistius (Jordan & Thompson, 1914) accepted
## 8 319690 Novaculops woodi (Jenkins, 1901) accepted
##
## More than one WORMS ID found for taxon 'Novaculops'!
##
## Enter rownumber of taxon (other inputs will return 'NA'):
#combine original names, parsed data and WoRMS ID into one data frame
combined_dataframe <- data.frame()
for (i in 1:length(cleaned_data)) {
cleaned_value <- cleaned_data[i]
canonical_value <- parsed_names[[i]]$canonical$full
worms_id_value <- worms_ids[[i]][1]
if (is.null(canonical_value)){
canonical_value <- NA
}
temp_row <- data.frame(CleanedData = cleaned_value, CanonicalFull = canonical_value, WormsIDs = worms_id_value)
combined_dataframe <- rbind(combined_dataframe, temp_row)
}
knitr::kable(head(combined_dataframe))
CleanedData | CanonicalFull | WormsIDs |
---|---|---|
Carcharhinus amblyrhynchos (Bleeker, | Carcharhinus amblyrhynchos | 217337 |
Carcharhinus galapagensis (Snodgrass & Heller, | Carcharhinus galapagensis | 105790 |
Galeocerdo cuvier (peron & Le Sueur, | Galeocerdo cuvier | 105799 |
Dasyatis thetidis Waite, 1899 | Dasyatis thetidis | 212247 |
Taeniura meyeni (Muller & Henle, | Taeniura meyeni | 217421 |
Albula neoguinaica Valenciennes, | Albula neoguinaica | 212254 |
Human Verification
Sometimes there are misspellings in the original text or incorrect OCR that can be searched for and fixed by hand. To do this, view the combined dataframe, search for unmatched species in WoRMS and add the ID, and remove rows that were not autoremoved in the earlier cleaning steps
combined_dataframe[9,2:3] = c("Enchelycore ramosus", 399857)
combined_dataframe[17, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Gymnothorax", "sp. A", 125636)
combined_dataframe[18, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Gymnothorax", "sp. B", 125636)
combined_dataframe[19, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Gymnothorax", "sp. C", 125636)
combined_dataframe[27,2:3] = c("Scarus psittacus", 219125)
combined_dataframe[29,2:3] = c("Scarus schlegeli", 276060)
combined_dataframe[31,2:3] = c("Limnichthys fasciatus", 277886)
combined_dataframe[33,2:3] = c("Parapercis cylindrica", 219155)
combined_dataframe[34,2:3] = c("Parapercis hexophtalma", 219159)
combined_dataframe[35,2:3] = c("Enneapterygius rufopileus", 277337)
combined_dataframe[37,2:3] = c("Norfolkia squamiceps", 276728)
combined_dataframe[38,2:3] = c("Cristiceps aurantiacus", 276603)
combined_dataframe[39,2:3] = c("Heteroclinus roseus", 281066)
combined_dataframe[40,2:3] = c("Cirripectes alboapicalis", 276716)
combined_dataframe[41,2:3] = c("Cirripectes castaneus", 219266)
combined_dataframe[42,2:3] = c("Cirripectes chelomatus", 276718)
combined_dataframe[43,2:3] = c("Enchelyurus ater", 276882)
combined_dataframe[48,2:3] = c("Plagiotremus rhinorhynchos", 219334)
combined_dataframe[51,2:3] = c("Xiphasia matsubarai", 219344)
combined_dataframe[53,2:3] = c("Ammodytoides vagus", 276860)
combined_dataframe[54,2:3] = c("Callionymus calcaratus", 302219)
combined_dataframe[55,2:3] = c("Amblygobius nocturnus", 219377)
combined_dataframe[57,2:3] = c("Asterropteryx semipunctatus", 219382)
combined_dataframe[60, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Callogobius", "sp. 3", 206441)
combined_dataframe[61, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Callogobius", "sp. 6", 206441)
combined_dataframe[75, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Priolepis", "sp. 3", 203905)
combined_dataframe[76, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Priolepis", "sp. 4", 203905)
combined_dataframe[93,2:3] = c("Zebrasoma scopas", 219679)
combined_dataframe[100,2:3] = c("Bothus pantherinus", 219795)
combined_dataframe[101,2:3] = c("Crossorhombus", 205615)
combined_dataframe[102,2:3] = c("Paraplagusia unicolor", NA)
combined_dataframe[103,2:3] = c("Aseraggodes bahamondei", 279708)
combined_dataframe[104,2:3] = c("Aseraggodes macleayanus", 279729)
combined_dataframe[105,2:3] = c("Aseraggodes ramsaii", 279738)
combined_dataframe[106,2:3] = c("Balistoides conspicillum", 219876)
combined_dataframe[107,2:3] = c("Rhinecanthus aculeatus", 219890)
combined_dataframe[108,2:3] = c("Rhinecanthus rectangulus", 219886)
combined_dataframe[109,2:3] = c("Sufflamen chrysopterus", 219895)
combined_dataframe[110,2:3] = c("Sufflamen freanatus", 403408)
combined_dataframe[111,2:3] = c("Aluterus monoceros", 127407)
combined_dataframe[112,2:3] = c("Brachaluteres taylori", 279952)
combined_dataframe[115,2:3] = c("Cantherhines pardalis", 220058)
combined_dataframe[117,2:3] = c("Oxymonacanthus longirostris", 220063)
combined_dataframe[119,2:3] = c("Thamnaconus analis", 277205)
combined_dataframe[122,2:3] = c("Lactoria fornasini", 219902)
combined_dataframe[164,2:3] = c("Antennarius nummifer", 126530)
combined_dataframe[170,2:3] = c("Lepadichthys frenatus", 279218)
combined_dataframe[171,2:3] = c("Gobiesocidae", 125477)
combined_dataframe[191, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Hippocampus", "sp. A", 126224)
combined_dataframe[192, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Hippocampus", "sp. B", 126224)
combined_dataframe[223,2:3] = c("Pseudanthias pictilis", 277472)
combined_dataframe[229,2:3] = c("Belonepterygion fasciolatum", 279890)
combined_dataframe[231,2:3] = c("Terapon jarbua", 218350)
combined_dataframe[234,2:3] = c("Priacanthus hamrur", 218360)
combined_dataframe[239, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Apogon", "sp. B", 125913)
combined_dataframe[244,2:3] = c("Sillago ciliata", 273939)
combined_dataframe[256,2:3] = c("Trachinotus baillonii", 218440)
combined_dataframe[259,2:3] = c("Arripis trutta", 279693)
combined_dataframe[273,2:3] = c("Lethrinus nebulosus", 212081)
combined_dataframe[274,2:3] = c("Scolopsis bilineatus", 401948)
combined_dataframe[275,2:3] = c("Mulloidichthys flavolineatus", 218647)
combined_dataframe[276,2:3] = c("Mulloidichthys vanicolensis", 218648)
combined_dataframe[280,2:3] = c("Parupeneus multifasciatus", 277820)
combined_dataframe[281,2:3] = c("Parupeneus pleurostigma", 218656)
combined_dataframe[282,2:3] = c("Parupeneus spilurus", 277825)
combined_dataframe[285,2:3] = c("Pempheris oualensis", 218700)
combined_dataframe[286,2:3] = c("Pempheris vanicolensis", 218701)
combined_dataframe[287,2:3] = c("Girella cyanea", 280857)
combined_dataframe[288,2:3] = c("Girella elevata", 280858)
combined_dataframe[295,2:3] = c("Bathystethus cultratus", 279876)
combined_dataframe[296,2:3] = c("Labracoglossa nitida", 281246)
combined_dataframe[297,2:3] = c("Scorpis lineolatus", 315585)
combined_dataframe[298,2:3] = c("Scorpis violaceus", 315588)
combined_dataframe[302,2:3] = c("Chaetodon citrinellus", 218744)
combined_dataframe[303,2:3] = c("Chaetodon flavirostris", 273337)
combined_dataframe[305,2:3] = c("Chaetodon kleinii", 218738)
combined_dataframe[306,2:3] = c("Chaetodon lineolatus", 218734)
combined_dataframe[307,2:3] = c("Chaetodon lunula", 218733)
combined_dataframe[312,2:3] = c("Chaetodon plebeius", 273354)
combined_dataframe[313,2:3] = c("Chaetodon rainfordi", 273358)
combined_dataframe[314,2:3] = c("Chaetodon speculum", 218740)
combined_dataframe[315,2:3] = c("Chaetodon tricinctus", 273365)
combined_dataframe[316,2:3] = c("Chaetodon trifascialis", 218719)
combined_dataframe[317,2:3] = c("Chaetodon trifasciatus", 218741)
combined_dataframe[319,2:3] = c("Chaetodon unimaculatus", 218753)
combined_dataframe[320,2:3] = c("Chaetodon vagabundus", 218754)
combined_dataframe[321,2:3] = c("Forcipiger flavissimus", 218760)
combined_dataframe[322,2:3] = c("Heniochus acuminatus", 218765)
combined_dataframe[323,2:3] = c("Centropyge bispinosus", 211779)
combined_dataframe[324,2:3] = c("Centropyge tibicen", 278851)
combined_dataframe[325,2:3] = c("Centropyge vrolikii", 278853)
combined_dataframe[326,2:3] = c("Chaetodontoplus conspicillatus", 280116)
combined_dataframe[327,2:3] = c("Chaetodontoplus meredithi", 280120)
combined_dataframe[328,2:3] = c("Genicanthus semicinctus", 279095)
combined_dataframe[329,2:3] = c("Pomacanthus imperator", 220001)
combined_dataframe[330,2:3] = c("Pomacanthus semicirculatus", 220003)
combined_dataframe[331,2:3] = c("Evistias acutirostris", 280765)
combined_dataframe[332,2:3] = c("Abudefduf bengalensis", 212885)
combined_dataframe[333,2:3] = c("Abudefduf sexfasciatus", 159289)
combined_dataframe[334,2:3] = c("Abudefduf sordidus", 212888)
combined_dataframe[335,2:3] = c("Abudefduf vaigiensis", 212879)
combined_dataframe[336,2:3] = c("Abudefduf whitleyi", 273703)
combined_dataframe[337,2:3] = c("Amphiprion latezonatus", 278395)
combined_dataframe[338,2:3] = c("Amphiprion mccullochi", 278397)
combined_dataframe[339,2:3] = c("Chromis atripectoralis", 212812)
combined_dataframe[340,2:3] = c("Chromis flavomaculata", 273727)
combined_dataframe[341,2:3] = c("Chromis hypsilepis", 273730)
combined_dataframe[342,2:3] = c("Chromis margaritifer", 273739)
combined_dataframe[343,2:3] = c("Chromis nitida", 273744)
combined_dataframe[344,2:3] = c("Chromis vanderbilti", 273758)
combined_dataframe[345,2:3] = c("Chrysiptera glauca", 218783)
combined_dataframe[346,2:3] = c("Chrysiptera notialis", 276835)
combined_dataframe[347,2:3] = c("Dascyllus aruanus", 212843)
combined_dataframe[348,2:3] = c("Dascyllus reticulatus", 212844)
combined_dataframe[349,2:3] = c("Dascyllus trimaculatus", 212846)
combined_dataframe[350,2:3] = c("Neoglyphidodon polyacanthus", 278815)
combined_dataframe[351,2:3] = c("Parma alboscapularis", 282152)
combined_dataframe[352,2:3] = c("Parma polylepis", 282159)
combined_dataframe[354,2:3] = c("Plectroglyphidodon johnstonianus", 212859)
combined_dataframe[355,2:3] = c("Plectroglyphidodon lacrymatus", 212860)
combined_dataframe[386,2:3] = c("Coris aygula", 218957)
combined_dataframe[387,2:3] = c("Coris bulbifrons", 273551)
combined_dataframe[388,2:3] = c("Coris gaimard", 218960)
combined_dataframe[370,2:3] = c("Cheilodactylus vittatus", 311553)
combined_dataframe[384,2:3] = c("Choerodon fasciatus", 277268)
combined_dataframe[390,2:3] = c("Coris sandeyeri", 273563)
combined_dataframe[391,2:3] = c("Cymolutes torquatus", 218966)
combined_dataframe[392,2:3] = c("Gomphosus varius", 218975)
combined_dataframe[393,2:3] = c("Halichoeres nebulosus", 218986)
combined_dataframe[395,2:3] = c("Hemigymnus fasciatus", 218999)
combined_dataframe[396,2:3] = c("Hemigymnus melapterus", 218998)
combined_dataframe[399,2:3] = c("Labroides bicolor", 219015)
combined_dataframe[404,2:3] = c("Notolabrus inscriptus", 281790)
combined_dataframe[416,2:3] = c("Thalassoma jansenii", 273582)
combined_dataframe[423,2:3] = c("Xyrichtys jacksonensis", 273599)
combined_dataframe <- combined_dataframe[-c(65),]
Darwin Core mapping
Required Terms
OBIS currently has eight required DwC terms: scientificName, scientificNameID, occurrenceID, eventDate, decimalLongitude, decimalLatitude, occurrenceStatus, basisOfRecord.
scientificName/scientificNameID
Create a dataframe with unique taxa only (though this should already be unique). This will be our primary DarwinCore data frame.
#rename and restructure WoRMSIDs to OBIS requirements
occurrence <- combined_dataframe %>%
distinct(CanonicalFull, identificationQualifier, WormsIDs) %>%
rename(scientificName = CanonicalFull) %>%
rename(scientificNameID = WormsIDs) %>%
mutate(scientificNameID = ifelse(!is.na(scientificNameID), paste("urn:lsid:marinespecies.org:taxname:", scientificNameID, sep = ""), NA))
occurrenceID
OccurrenceID is an identifier for the occurrence record and should be persistent and globally unique. It is a combination of dataset-shortname:occurrence: and a hash based on the scientific name.
# Vectorize the digest function (The digest() function isn't vectorized. So if you pass in a vector, you get one value for the whole vector rather than a digest for each element of the vector):
vdigest <- Vectorize(digest)
# Generate taxonID:
occurrence %<>% mutate(occurrenceID = paste(short_name, "occurrence", vdigest (paste(scientificName, identificationQualifier), algo="md5"), sep=":"))
eventDate
This is NULL since this is technically a checklist and we do not know the collection date.
decimalLongitude/decimalLatitude
Use obistools::calculate_centroid to calculate a centroid and radius for WKT strings. This is useful for populating decimalLongitude, decimalLatitude and coordinateUncertaintyInMeters. The WKT strings are from https://github.com/iobis/mwhs-shapes.
if (!file.exists(paste(path_to_project_root, "scripts_data/marine_world_heritage.gpkg", sep="/"))) {
download.file("https://github.com/iobis/mwhs-shapes/blob/master/output/marine_world_heritage.gpkg?raw=true", paste(path_to_project_root, "scripts_data/marine_world_heritage.gpkg", sep="/"))
}
shapes <- st_read(paste(path_to_project_root, "scripts_data/marine_world_heritage.gpkg", sep="/"))
## Reading layer `marine_world_heritage' from data source
## `/mnt/c/Users/Chandra Earl/Desktop/Labs/UNESCO/mwhs-data-mobilization/scripts_data/marine_world_heritage.gpkg'
## using driver `GPKG'
## Simple feature collection with 60 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -55.32282 xmax: 180 ymax: 71.81381
## Geodetic CRS: 4326
#For some sites, the GeoPackage has core as well as buffer areas. Merge the geometries by site.
shapes_processed <- shapes %>%
group_by(name) %>%
summarize()
#Lord Howe Island Group
ind_shape <- shapes_processed$geom[which(shapes_processed$name == "Lord Howe Island Group")]
#convert shape to WKT
wkt <- st_as_text(ind_shape, digits = 6)
localities <- calculate_centroid(wkt)
occurrence %<>% mutate(decimalLatitude = localities$decimalLatitude)
occurrence %<>% mutate(decimalLongitude = localities$decimalLongitude)
Extra Terms
coordinateUncertaintyInMeters
Post-processing
Check data
Use the check_fields command from obistools to check if all OBIS required fields are present in an occurrence table and if any values are missing.
#Reorganize columns
occurrence = occurrence %>% select(occurrenceID, scientificName, identificationQualifier,scientificNameID, eventDate, country, locality, decimalLatitude, decimalLongitude, coordinateUncertaintyInMeters, footprintWKT, geodeticDatum, occurrenceStatus, basisOfRecord)
#Check fields
check_fields(occurrence)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## ℹ The deprecated feature was likely used in the obistools package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 431 × 4
## level field row message
## <chr> <chr> <int> <chr>
## 1 error eventDate 1 Empty value for required field eventDate
## 2 error eventDate 2 Empty value for required field eventDate
## 3 error eventDate 3 Empty value for required field eventDate
## 4 error eventDate 4 Empty value for required field eventDate
## 5 error eventDate 5 Empty value for required field eventDate
## 6 error eventDate 6 Empty value for required field eventDate
## 7 error eventDate 7 Empty value for required field eventDate
## 8 error eventDate 8 Empty value for required field eventDate
## 9 error eventDate 9 Empty value for required field eventDate
## 10 error eventDate 10 Empty value for required field eventDate
## # ℹ 421 more rows
Create the EML file
This is a file which contains the dataset’s metadata and is required in a DarwinCore-Archive.
## [1] "eml-2.1.1"
#Title
title <- "Checklist of the coastal fishes of Lord Howe, Norfolk and Kermadec Islands, southwest Pacific Ocean: Fishes Checklist"
#AlternateIdentifier
alternateIdentifier <- paste("https://ipt.obis.org/secretariat/resource?r=", short_name, sep="")
#Abstract
abstract <- eml$abstract(
para = "A checklist ofcoastal fishes includes 433 species from Lord Howe Island, 254 from Norfolk Island, and 145 from the Kermadec Islands. Tropical and subtropical species dominate all three faunas, but the proportion of tropical species decreases, and the proportion of subtropical species increases, from west to east. Subtropical species are the most abundant individual fishes at all three islands. Only 4.6% of the combined fauna is endemic, with individual island endemism even lower (1.2-2.1 %). The fish faunas of the three islands appear to have originated mainly by larval dispersal from Australia and the Coral Sea. Evidence for present-day dispersal is discussed. Faunal relationships among the subtropical islands of the western, central, and eastern South Pacific are examined. In the South Pacific as a whole, there is a high positive correlation between coastal fish diversity and hermatypic coral diversity.."
)
People
Here we add the people involved in the project:
The creator is the person or organization responsible for creating the resource itself.
The contact is the person or institution to contact with questions about the use, interpretation of a data set.
The metadataProvider is the person responsible for providing the metadata documentation for the resource.
The associatedParty (in this case the Data Curator) is the person who mobilized the data from the original resource.
creator <- eml$creator(
individualName = eml$individualName(
givenName = "Malcolm P.",
surName = "Francis"),
organizationName = "University of Auckland"
)
contact <- eml$creator(
individualName = eml$individualName(
givenName = "OBIS",
surName = "Secretariat"),
electronicMailAddress = "helpdesk@obis.org",
organizationName = "OBIS",
positionName = "Secretariat"
)
metadataProvider <- eml$metadataProvider(
individualName = eml$individualName(
givenName = "Chandra",
surName = "Earl"),
electronicMailAddress = "c.earl@unesco.org",
organizationName = "UNESCO",
positionName = "eDNA Scientific Officer"
)
associatedParty <- eml$associatedParty(
role = "processor",
individualName = eml$individualName(
givenName = "Chandra",
surName = "Earl"),
electronicMailAddress = "c.earl@unesco.org",
organizationName = "UNESCO",
positionName = "eDNA Scientific Officer"
)
Additional Metadata
Here we add the additionalMetadata element, which is required for a GBIF-type EML file and contains information such as the citation of the dataset, the citation of the original resource and the creation timestamp of the EML.
#{dataset.authors} ({dataset.pubDate}) {dataset.title}. [Version {dataset.version}]. {organization.title}. {dataset.type} Dataset {dataset.doi}, {dataset.url}
additionalMetadata <- eml$additionalMetadata(
metadata = list(
gbif = list(
dateStamp = paste0(format(Sys.time(), "%Y-%m-%dT%H:%M:%OS3"), paste0(substr(format(Sys.time(), "%z"), 1, 3), ":", paste0(substr(format(Sys.time(), "%z"), 4, 5)))),
hierarchyLevel = "dataset",
citation = "IPT will autogenerate this",
bibliography = list(
citation = "Francis, Malcolm. (1993). Checklist of the coastal fishes of Lord Howe, Norfolk and Kermadec Islands, southwest Pacific Ocean.")
)
)
)
citationdoi <- "http://dx.doi.org/10.6084/m9.figshare.c.4428305"
Coverage
Here we describe the dataset’s geographic, taxonomic and temporal coverage.
#Coverage
coverage <- eml$coverage(
geographicCoverage = eml$geographicCoverage(
geographicDescription = "Lord Howe Island Group",
boundingCoordinates = eml$boundingCoordinates(
westBoundingCoordinate = st_bbox(ind_shape)$xmax,
eastBoundingCoordinate = st_bbox(ind_shape)$xmin,
northBoundingCoordinate = st_bbox(ind_shape)$ymax,
southBoundingCoordinate = st_bbox(ind_shape)$ymin)
),
taxonomicCoverage = eml$taxonomicCoverage(
generalTaxonomicCoverage = "Fishes",
taxonomicClassification = list(
eml$taxonomicClassification(
taxonRankName = "Superclass",
taxonRankValue = "Agnatha"),
eml$taxonomicClassification(
taxonRankName = "unranked",
taxonRankValue = "Chondrichthyes"),
eml$taxonomicClassification(
taxonRankName = "unranked",
taxonRankValue = "Osteichthyes")
)
# ),
# temporalCoverage = eml$temporalCoverage(
# rangeOfDates = eml$rangeOfDates(
# beginDate = eml$beginDate(
# calendarDate = "2019-05-01"
# ),
# endDate = eml$endDate(
# calendarDate = "2016-05-06"
# )
# )
)
)
Extra MetaData
These fields are not required, though they make the metadata more complete.
methods <- eml$methods(
methodStep = eml$methodStep(
description = eml$description(
para = paste("See Github <a href=\"https://github.com/iobis/mwhs-data-mobilization\">Project</a> and <a href=\"https://iobis.github.io/mwhs-data-mobilization/notebooks/", site_dir_name, "/", dataset_dir_name, "\"> R Notebook</a> for dataset construction methods", sep="")
)
)
)
#Other Data
pubDate <- "2023-10-15"
#language of original document
language <- "eng"
keywordSet <- eml$keywordSet(
keyword = "Occurrence",
keywordThesaurus = "GBIF Dataset Type Vocabulary: http://rs.gbif.org/vocabulary/gbif/dataset_type_2015-07-10.xml"
)
maintenance <- eml$maintenance(
description = eml$description(
para = ""),
maintenanceUpdateFrequency = "notPlanned"
)
#Universal CC
intellectualRights <- eml$intellectualRights(
para = "To the extent possible under law, the publisher has waived all rights to these data and has dedicated them to the <ulink url=\"http://creativecommons.org/publicdomain/zero/1.0/legalcode\"><citetitle>Public Domain (CC0 1.0)</citetitle></ulink>. Users may copy, modify, distribute and use the work, including for commercial purposes, without restriction."
)
purpose <- eml$purpose(
para = "These data were made accessible through UNESCO's eDNA Expeditions project to mobilize available marine species and occurrence datasets from World Heritage Sites."
)
additionalInfo <- eml$additionalInfo(
para = "marine, harvested by iOBIS"
)
Create and Validate EML
#Put it all together
my_eml <- eml$eml(
packageId = paste("https://ipt.obis.org/secretariat/resource?id=", short_name, "/v1.0", sep = ""),
system = "http://gbif.org",
scope = "system",
dataset = eml$dataset(
alternateIdentifier = alternateIdentifier,
title = title,
creator = creator,
metadataProvider = metadataProvider,
associatedParty = associatedParty,
pubDate = pubDate,
coverage = coverage,
language = language,
abstract = abstract,
keywordSet = keywordSet,
contact = contact,
methods = methods,
intellectualRights = intellectualRights,
purpose = purpose,
maintenance = maintenance,
additionalInfo = additionalInfo),
additionalMetadata = additionalMetadata
)
eml_validate(my_eml)
## [1] TRUE
## attr(,"errors")
## character(0)
Create meta.xml file
This is a file which describes the archive and data file structure and is required in a DarwinCore-Archive. It is based on the template file “meta_occurrence_checklist_template.xml”
meta_template <- paste(path_to_project_root, "scripts_data/meta_occurrence_checklist_template.xml", sep="/")
meta <- read_xml(meta_template)
fields <- xml_find_all(meta, "//d1:field")
for (field in fields) {
term <- xml_attr(field, "term")
if (term == "http://rs.tdwg.org/dwc/terms/eventDate") {
xml_set_attr(field, "default", eventDate)
} else if (term == "http://rs.tdwg.org/dwc/terms/country") {
xml_set_attr(field, "default", country)
} else if (term == "http://rs.tdwg.org/dwc/terms/locality") {
xml_set_attr(field, "default", locality)
} else if (term == "http://rs.tdwg.org/dwc/terms/decimalLatitude") {
xml_set_attr(field, "default", localities$decimalLatitude)
} else if (term == "http://rs.tdwg.org/dwc/terms/decimalLongitude") {
xml_set_attr(field, "default", localities$decimalLongitude)
} else if (term == "http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters") {
xml_set_attr(field, "default", localities$coordinateUncertaintyInMeters)
} else if (term == "http://rs.tdwg.org/dwc/terms/footprintWKT") {
xml_set_attr(field, "default", wkt)
} else if (term == "http://rs.tdwg.org/dwc/terms/geodeticDatum") {
xml_set_attr(field, "default", geodeticDatum)
} else if (term == "http://rs.tdwg.org/dwc/terms/occurrenceStatus") {
xml_set_attr(field, "default", occurrenceStatus)
} else if (term == "http://rs.tdwg.org/dwc/terms/basisOfRecord") {
xml_set_attr(field, "default", basisOfRecord)
}
}
#Add identificationQualifier
new_field <- xml_add_sibling(fields[[3]], "field")
xml_set_attr(new_field, "index", "3")
xml_set_attr(new_field, "term", "http://rs.tdwg.org/dwc/terms/identificationQualifier")
fields <- append(fields, list(new_field))
Save outputs
dwc_output_dir <- paste(path_to_project_root, "output", site_dir_name, dataset_dir_name, sep="/")
write.csv(occurrence, paste(dwc_output_dir, "/occurrence.csv", sep = ""), na = "", row.names=FALSE)
write_xml(meta, file = paste(dwc_output_dir, "/meta.xml", sep = ""))
write_eml(my_eml, paste(dwc_output_dir, "/eml.xml", sep = ""))
Edit EML
We have to further edit the eml file to conform to GBIF-specific requirements that cannot be included in the original EML construction. This includes changing the schemaLocation and rearranging the GBIF element, since the construction automatically arranges the children nodes to alphabetical order.
#edit the schemaLocation and rearrange gbif node for gbif specific eml file
eml_content <- read_xml(paste(dwc_output_dir, "/eml.xml", sep = ""))
#change schemaLocation attributes for GBIF
root_node <- xml_root(eml_content)
xml_set_attr(root_node, "xsi:schemaLocation", "https://eml.ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.2/eml.xsd")
xml_set_attr(root_node, "xmlns:dc", "http://purl.org/dc/terms/")
xml_set_attr(root_node, "xmlns:stmml", NULL)
xml_set_attr(root_node, "xml:lang", "eng")
#rearrange children nodes under the GBIF element
hierarchyLevel <- eml_content %>% xml_find_all(".//hierarchyLevel")
dateStamp <- eml_content %>% xml_find_all(".//dateStamp")
citation <- eml_content %>% xml_find_all("./additionalMetadata/metadata/gbif/citation")
bibcitation <- eml_content %>% xml_find_all("./additionalMetadata/metadata/gbif/bibliography/citation")
xml_set_attr(bibcitation, "identifier", citationdoi)
eml_content %>% xml_find_all(".//hierarchyLevel") %>% xml_remove()
eml_content %>% xml_find_all(".//dateStamp") %>% xml_remove()
eml_content %>% xml_find_all("./additionalMetadata/metadata/gbif/citation") %>% xml_remove()
eml_content %>% xml_find_all(".//gbif") %>% xml_add_child(citation, .where=0)
eml_content %>% xml_find_all(".//gbif") %>% xml_add_child(hierarchyLevel, .where=0)
eml_content %>% xml_find_all(".//gbif") %>% xml_add_child(dateStamp, .where=0)
write_xml(eml_content, paste(dwc_output_dir, "/eml.xml", sep = ""))