Bundling Francis 1993 to a DwC Archive

This is an R Markdown Notebook for converting the species checklist found in the following reference to DarwinCore format for upload into OBIS as part of UNESCO’s eDNA Expeditions project:

Francis, Malcolm. (1993). Checklist of the coastal fishes of Lord Howe, Norfolk and Kermadec Islands, southwest Pacific Ocean.

Setup

Call the necessary libraries and variables. Suppresses loading messages.

library(magrittr)                       # To use %<>% pipes
suppressMessages(library(janitor))      # To clean input data
suppressMessages(library(dplyr))        # To clean input data
library(stringr)                        # To clean input data
suppressMessages(library(rgnparser))    # To clean species names
suppressMessages(library(taxize))       # To get WoRMS IDs
library(worrms)                         # To get WoRMS IDs
library(digest)                         # To generate hashes
suppressMessages(library(obistools))    # To generate centroid lat/long and uncertainty
suppressMessages(library(sf))           # To generate wkt polygon
suppressMessages(library(EML))          # To create eml.xml file
library(xml2)                           # To create the meta.xml file
suppressMessages(library(zip))          # To zip DwC file

Input Parameters and Paths

path_to_project_root <- "../../.."
site_dir_name <- "lord_howe_island_group"
dataset_dir_name <- "Francis_1993"
original_pdf <- "v47n2-136-170.pdf"
short_name <- "lord-howe-francis-1993"

Parsing PDF table to CSV

The data for this reference is formatted as an image-based table inside a PDF across multiple sheets. First, we use pdf_to_table to OCR and parse out the table to a CSV.

#conda environment
condaenv <- "mwhs-data-mobilization"

# Path to the Python script
script <- paste(path_to_project_root, "scripts_data/pdf_to_tables/pdf_to_table.py", sep="/")

# Input PDF file path
input_pdf <- paste(path_to_project_root, "datasets", site_dir_name, dataset_dir_name, "raw", original_pdf, sep="/")

# Output directory for OCR/table files
output_dir <- paste(path_to_project_root, "datasets", site_dir_name, dataset_dir_name, "processed", sep="/")

# Define page numbers and table areas (see documentation)
page_args <- c(
"-a 185.463,24.165,618.111,305.999 -p 22",
"-a 140.621,45.911,630.341,330.619 -p 23",
"-a 140.043,27.077,627.428,311.239 -p 24",
"-a 141.199,44.179,617.059,323.689 -p 25",
"-a 141.492,23.437,618.92,306.422 -p 26",
"-a 144.572,47.8,630.781,326.387 -p 27",
"-a 143.838,16.112,630.135,305.547 -p 28",
"-a 145.604,57.359,630.952,336.802 -p 29",
"-a 146.317,19.567,625.863,302.856 -p 30",
"-a 145.745,52.492,640.165,334.598 -p 31",
"-a 150.617,23.285,649.924,309.193 -p 32",
"-a 148.863,51.199,621.199,329.984 -p 33"
)

# Define run parameters (see documentation)
run_parameters <- "-s -nh"

# Combine page arguments and execute
page_args_combined <- paste(page_args, collapse = " ")
command <- paste("conda run -n", condaenv, "python", script, "-i", input_pdf, run_parameters, page_args_combined, "-o", output_dir)
system(command, intern=TRUE)
##  [1] ""                                                                                                         
##  [2] "Script Execution Summary"                                                                                 
##  [3] "Date and Time: 2023-09-15 03:36:22"                                                                       
##  [4] "------------------------------"                                                                           
##  [5] ""                                                                                                         
##  [6] "PDF input: ../../../datasets/lord_howe_island_group/Francis_1993/raw/v47n2-136-170.pdf"                   
##  [7] "Perform Table Parsing: TRUE"                                                                              
##  [8] "Selected Areas:"                                                                                          
##  [9] "  Area 1: [185.463, 24.165, 618.111, 305.999]"                                                            
## [10] "  Area 2: [140.621, 45.911, 630.341, 330.619]"                                                            
## [11] "  Area 3: [140.043, 27.077, 627.428, 311.239]"                                                            
## [12] "  Area 4: [141.199, 44.179, 617.059, 323.689]"                                                            
## [13] "  Area 5: [141.492, 23.437, 618.92, 306.422]"                                                             
## [14] "  Area 6: [144.572, 47.8, 630.781, 326.387]"                                                              
## [15] "  Area 7: [143.838, 16.112, 630.135, 305.547]"                                                            
## [16] "  Area 8: [145.604, 57.359, 630.952, 336.802]"                                                            
## [17] "  Area 9: [146.317, 19.567, 625.863, 302.856]"                                                            
## [18] "  Area 10: [145.745, 52.492, 640.165, 334.598]"                                                           
## [19] "  Area 11: [150.617, 23.285, 649.924, 309.193]"                                                           
## [20] "  Area 12: [148.863, 51.199, 621.199, 329.984]"                                                           
## [21] "Pages: 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33"                                                    
## [22] "Concatenate: False"                                                                                       
## [23] "Concatenate across headers: True"                                                                         
## [24] "Stream Extraction: True"                                                                                  
## [25] "Lattice Extraction: False"                                                                                
## [26] ""                                                                                                         
## [27] "Parsing Tables"                                                                                           
## [28] "------------------------------"                                                                           
## [29] ""                                                                                                         
## [30] ""                                                                                                         
## [31] "Saving to CSV"                                                                                            
## [32] "CSV file(s):"                                                                                             
## [33] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_1.csv"      
## [34] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_2.csv"      
## [35] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_3.csv"      
## [36] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_4.csv"      
## [37] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_5.csv"      
## [38] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_6.csv"      
## [39] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_7.csv"      
## [40] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_8.csv"      
## [41] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_9.csv"      
## [42] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_10.csv"     
## [43] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_11.csv"     
## [44] "\t../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_tables_parsed_12.csv"     
## [45] "------------------------------"                                                                           
## [46] ""                                                                                                         
## [47] ""                                                                                                         
## [48] "Run Details: ../../../datasets/lord_howe_island_group/Francis_1993/processed/v47n2-136-170_parameters.txt"
## [49] "Finished"                                                                                                 
## [50] ""

Read source data

Now we’ll read in the csv table outputted from the previous step

file_list <- list.files(pattern = "v47n2-136-170_tables_parsed_\\d+.csv", path = paste(path_to_project_root, "datasets", site_dir_name, dataset_dir_name, "processed", sep = "/"), full.names = TRUE)
df_list <- list()

for (file in file_list) {
  df <- read.csv(file, header = FALSE, stringsAsFactors = FALSE)
  df[1, ] <- lapply(df[1, ], function(x) ifelse(grepl("^Unnamed", x), "", x))
  first_col <- min(which(!is.na(df[1, ])))
  last_col <- max(which(!is.na(df[1, ])))
  df <- data.frame(
    First_Column = df[ , first_col],
    Last_Column = df[ , last_col],
    stringsAsFactors = FALSE
  )
  df_list[[file]] <- df
}

input_data <- do.call(rbind, df_list)
rownames(input_data) <- NULL
colnames(input_data) <- c("taxa", "lord_howe_island")

#to preview pretty table
knitr::kable(head(input_data))
taxa lord_howe_island
Odontaspididae
Odontaspis ferox (Risso,
Lamnidae
Carcharodon carcharias (Linnaeus,
Triakidae
Mustelus lenticulatus Phillipps,

Preprocessing

Here we tidy the data up, since OCR and table parsing errors are common and only take the list of species, since this is a checklist.

Tidy Data

input_data %<>%
  remove_empty(c("rows", "cols")) %>%       # Remove empty rows and columns
  clean_names()

#Remove rows with nothing in the last column (ie those that are not in Lord Howe Island)
input_data <- input_data[input_data$lord_howe_island != "", ]

# Remove Classes, Families and Orders and take first column only
cleaned_data <- input_data[,1]

#to preview pretty table
knitr::kable(head(cleaned_data))
x
Carcharhinus amblyrhynchos (Bleeker,
Carcharhinus galapagensis (Snodgrass & Heller,
Galeocerdo cuvier (peron & Le Sueur,
Dasyatis thetidis Waite, 1899
Taeniura meyeni (Muller & Henle,
Albula neoguinaica Valenciennes,

Get WoRMS IDs

Auto matching

First we will try to do this automatically by first cleaning the species names using gnparser and then using the taxise library to call the WoRMS database.

#Parse author names out
parsed_names <- rgnparser::gn_parse(cleaned_data[])

#Function to get WoRMS IDs. Search for accepted names first and if not found, search for unaccepted. If still not found, use the worrms package to search.
get_worms_id_from_element <- function(element) {
  worms_id <- get_wormsid(element$canonical$full, searchtype="scientific", fuzzy=TRUE, messages = FALSE, accepted = TRUE)
  if (attr(worms_id, "match") == "not found") {
    worms_id <- get_wormsid(element$canonical$full, searchtype="scientific", messages = FALSE, fuzzy=TRUE)
    if (attr(worms_id, "match") == "not found") {
      worms_id <- NA
    }
  }
  return(worms_id)
}

#Call the function
worms_ids <- lapply(parsed_names, function(element) {
  if (element$parsed) {
    return(get_worms_id_from_element(element))
  } else {
    return(NA)
  }
})
## 
##          id                                target
## 1    126224                           Hippocampus
## 5   1525460                           Hippocampus
## 6    275182               Hippocampus abdominalis
## 10   275183                    Hippocampus alatus
## 11   275184                 Hippocampus algiricus
## 12   275185                  Hippocampus angustus
## 18   275186                  Hippocampus barbouri
## 19   275187                Hippocampus bargibanti
## 23   212238              Hippocampus borboniensis
## 25   275189                 Hippocampus breviceps
## 30   212233            Hippocampus camelopardalis
## 31   212234                  Hippocampus capensis
## 32   886550                  Hippocampus casscsio
## 34   275190                  Hippocampus colemani
## 35   275191                     Hippocampus comes
## 36   275192                 Hippocampus coronatus
## 37   388711               Hippocampus curvicuspis
## 40   398432                  Hippocampus debelius
## 41   275193                    Hippocampus denise
## 44   159445                   Hippocampus erectus
## 49   275194                   Hippocampus fisheri
## 50   212230                    Hippocampus fuscus
## 51   212230                    Hippocampus fuscus
## 55   275195                Hippocampus grandiceps
## 56   154776                Hippocampus guttulatus
## 57   248042 Hippocampus guttulatus microstephanus
## 59  1376208                     Hippocampus haema
## 60   275196                  Hippocampus hendriki
## 64   127380               Hippocampus hippocampus
## 67   212239                   Hippocampus histrix
## 71   275197                    Hippocampus ingens
## 72  1288522                  Hippocampus japapigu
## 74   275198                  Hippocampus jayakari
## 75   275199                   Hippocampus jugumus
## 78   212236                  Hippocampus kelloggi
## 80   212237                      Hippocampus kuda
## 84   275200            Hippocampus lichtensteinii
## 90   275201                  Hippocampus minotaur
## 91   275202                  Hippocampus mohnikei
## 96   275203           Hippocampus montebelloensis
## 97   275204               Hippocampus multispinus
## 98  1437127                      Hippocampus nalu
## 106  712534                 Hippocampus paradoxus
## 107  275205               Hippocampus patagonicus
## 109  306811                Hippocampus planifrons
## 112  398433                   Hippocampus pontohi
## 116  388712                  Hippocampus pusillus
## 117  275207            Hippocampus queenslandicus
## 121  159446                     Hippocampus reidi
## 124  398434                  Hippocampus satomiae
## 125  275208              Hippocampus semispinosus
## 127  275209                  Hippocampus sindonis
## 128  275210             Hippocampus spinosissimus
## 131  275211              Hippocampus subelongatus
## 132  306822                 Hippocampus suezensis
## 137  212232              Hippocampus trimaculatus
## 140  474956                      Hippocampus tyro
## 143  398436                Hippocampus waleananus
## 144  212235                    Hippocampus whitei
## 145  275212                     Hippocampus zebra
## 146  275213                  Hippocampus zosterae
##                                                               authority
## 1                                                      Rafinesque, 1810
## 5                                                      Rafinesque, 1810
## 6                                                          Lesson, 1827
## 10                                                         Kuiter, 2001
## 11                                                           Kaup, 1856
## 12                                                        Günther, 1870
## 18                                            Jordan & Richardson, 1908
## 19                                                        Whitley, 1970
## 23                                                        Duméril, 1870
## 25                                                         Peters, 1869
## 30                                                       Bianconi, 1854
## 31                                                      Boulenger, 1900
## 32                                         Zhang, Qin, Wang & Lin, 2016
## 34                                                         Kuiter, 2003
## 35                                                         Cantor, 1849
## 36                                            Temminck & Schlegel, 1850
## 37                                                         Fricke, 2004
## 40                                                 Gomon & Kuiter, 2009
## 41                                               Lourie & Randall, 2003
## 44                                                          Perry, 1810
## 49                                              Jordan & Evermann, 1903
## 50                                                        Rüppell, 1838
## 51                                                        Rüppell, 1838
## 55                                                         Kuiter, 2001
## 56                                                         Cuvier, 1829
## 57                                                    Slastenenko, 1937
## 59                                          Han, Kim, Kai & Senou, 2017
## 60                                                         Kuiter, 2001
## 64                                                     (Linnaeus, 1758)
## 67                                                           Kaup, 1856
## 71                                                         Girard, 1858
## 72                     Short, Smith, Motomura, Harasti & Hamilton, 2018
## 74                                                      Boulenger, 1900
## 75                                                         Kuiter, 2001
## 78                                                Jordan & Snyder, 1901
## 80                                                        Bleeker, 1852
## 84                                                           Kaup, 1856
## 90                                                          Gomon, 1997
## 91                                                        Bleeker, 1853
## 96                                                         Kuiter, 2001
## 97                                                         Kuiter, 2001
## 98  Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 106                                                Foster & Gomon, 2010
## 107                                         Piacentino & Luzzatto, 2004
## 109                                                        Peters, 1877
## 112                                               Lourie & Kuiter, 2008
## 116                                                        Fricke, 2004
## 117                                                         Horne, 2001
## 121                                                      Ginsburg, 1933
## 124                                               Lourie & Kuiter, 2008
## 125                                                        Kuiter, 2001
## 127                                               Jordan & Snyder, 1901
## 128                                                         Weber, 1913
## 131                                                     Castelnau, 1873
## 132                                                       Duncker, 1940
## 137                                                         Leach, 1814
## 140                                              Randall & Lourie, 2009
## 143                                                Gomon & Kuiter, 2009
## 144                                                       Bleeker, 1855
## 145                                                       Whitley, 1964
## 146                                              Jordan & Gilbert, 1882
##       status
## 1   accepted
## 5   accepted
## 6   accepted
## 10  accepted
## 11  accepted
## 12  accepted
## 18  accepted
## 19  accepted
## 23  accepted
## 25  accepted
## 30  accepted
## 31  accepted
## 32  accepted
## 34  accepted
## 35  accepted
## 36  accepted
## 37  accepted
## 40  accepted
## 41  accepted
## 44  accepted
## 49  accepted
## 50  accepted
## 51  accepted
## 55  accepted
## 56  accepted
## 57  accepted
## 59  accepted
## 60  accepted
## 64  accepted
## 67  accepted
## 71  accepted
## 72  accepted
## 74  accepted
## 75  accepted
## 78  accepted
## 80  accepted
## 84  accepted
## 90  accepted
## 91  accepted
## 96  accepted
## 97  accepted
## 98  accepted
## 106 accepted
## 107 accepted
## 109 accepted
## 112 accepted
## 116 accepted
## 117 accepted
## 121 accepted
## 124 accepted
## 125 accepted
## 127 accepted
## 128 accepted
## 131 accepted
## 132 accepted
## 137 accepted
## 140 accepted
## 143 accepted
## 144 accepted
## 145 accepted
## 146 accepted
## 
## More than one WORMS ID found for taxon 'Hippocampus'!
## 
##                   Enter rownumber of taxon (other inputs will return 'NA'):
##          id                                 target
## 1    126224                            Hippocampus
## 5   1525460                            Hippocampus
## 6    275182                Hippocampus abdominalis
## 7    306774                    Hippocampus agnesae
## 8    306775                      Hippocampus aimei
## 9    716772                      Hippocampus aimei
## 10   275183                     Hippocampus alatus
## 11   275184                  Hippocampus algiricus
## 12   275185                   Hippocampus angustus
## 13   306776                 Hippocampus antiquorum
## 14   306777                   Hippocampus antiquus
## 15   400954                      Hippocampus arnei
## 16   716773                      Hippocampus arnei
## 17   306778                  Hippocampus aterrimus
## 18   275186                   Hippocampus barbouri
## 19   275187                 Hippocampus bargibanti
## 20   713214                   Hippocampus bicuspis
## 21   275188                Hippocampus biocellatus
## 22   306779                   Hippocampus bleekeri
## 23   212238               Hippocampus borboniensis
## 24   306780             Hippocampus brachyrhynchus
## 25   275189                  Hippocampus breviceps
## 26   306781               Hippocampus brevirostris
## 27   306782                   Hippocampus brunneus
## 28   400945            Hippocampus cameleopardalis
## 29   400946            Hippocampus cameleopardalus
## 30   212233             Hippocampus camelopardalis
## 31   212234                   Hippocampus capensis
## 32   886550                   Hippocampus casscsio
## 33   306783                  Hippocampus chinensis
## 34   275190                   Hippocampus colemani
## 35   275191                      Hippocampus comes
## 36   275192                  Hippocampus coronatus
## 37   388711                Hippocampus curvicuspis
## 38   306784                      Hippocampus dahli
## 39   306785                     Hippocampus deanei
## 40   398432                   Hippocampus debelius
## 41   275193                     Hippocampus denise
## 42   306786               Hippocampus ecuadorensis
## 43   306787                  Hippocampus elongatus
## 44   159445                    Hippocampus erectus
## 45   306788                  Hippocampus erinaceus
## 46   154815                  Hippocampus europaeus
## 47   306789               Hippocampus fascicularis
## 48   713215               Hippocampus filamentosus
## 49   275194                    Hippocampus fisheri
## 50   212230                     Hippocampus fuscus
## 51   212230                     Hippocampus fuscus
## 52   306790              Hippocampus graciliformis
## 53   306791                   Hippocampus gracilis
## 54   306792              Hippocampus gracilissimus
## 55   275195                 Hippocampus grandiceps
## 56   154776                 Hippocampus guttulatus
## 57   248042  Hippocampus guttulatus microstephanus
## 58   323136  Hippocampus guttulatus multiannularis
## 59  1376208                      Hippocampus haema
## 60   275196                   Hippocampus hendriki
## 61   154458                 Hippocampus heptagonus
## 62   306793                Hippocampus hildebrandi
## 63   306794                    Hippocampus hilonis
## 64   127380                Hippocampus hippocampus
## 65   322937 Hippocampus hippocampus microcoronatus
## 66   322938 Hippocampus hippocampus microstephanus
## 67   212239                    Hippocampus histrix
## 68   306795                      Hippocampus horai
## 69   306796                  Hippocampus hudsonius
## 70   400949                    Hippocampus hystrix
## 71   275197                     Hippocampus ingens
## 72  1288522                   Hippocampus japapigu
## 73   306797                  Hippocampus japonicus
## 74   275198                   Hippocampus jayakari
## 75   275199                    Hippocampus jugumus
## 76   306798           Hippocampus kampylotrachelos
## 77   306799                     Hippocampus kaupii
## 78   212236                   Hippocampus kelloggi
## 79   306800                   Hippocampus kincaidi
## 80   212237                       Hippocampus kuda
## 81   323205        Hippocampus kuda multiannularis
## 82   306801              Hippocampus laevicaudatus
## 83   713217                      Hippocampus lenis
## 84   275200             Hippocampus lichtensteinii
## 85   154777               Hippocampus longirostris
## 86   306802                 Hippocampus manadensis
## 87   306803                   Hippocampus mannulus
## 88   306804                 Hippocampus marginalis
## 89   306805               Hippocampus melanospilos
## 90   275201                   Hippocampus minotaur
## 91   275202                   Hippocampus mohnikei
## 92   306806                Hippocampus moluccensis
## 93   400952                    Hippocampus monckei
## 94   400951                   Hippocampus monickei
## 95   400953                    Hippocampus monikei
## 96   275203            Hippocampus montebelloensis
## 97   275204                Hippocampus multispinus
## 98  1437127                       Hippocampus nalu
## 99   306807                 Hippocampus natalensis
## 100  306808             Hippocampus novaehebudorum
## 101  400955             Hippocampus novaehollandae
## 102  306809            Hippocampus novaehollandiae
## 103  713212                   Hippocampus obscurus
## 104  713213                   Hippocampus obscurus
## 105  306810                    Hippocampus obtusus
## 106  712534                  Hippocampus paradoxus
## 107  275205                Hippocampus patagonicus
## 108  400947                 Hippocampus pentagonus
## 109  306811                 Hippocampus planifrons
## 110  306812                      Hippocampus poeyi
## 111  306813                 Hippocampus polytaenia
## 112  398433                    Hippocampus pontohi
## 113  275206                   Hippocampus procerus
## 114  306814                Hippocampus punctulatus
## 115  306815                Hippocampus punctulatus
## 116  388712                   Hippocampus pusillus
## 117  275207             Hippocampus queenslandicus
## 118  306816                       Hippocampus raji
## 119  127381                  Hippocampus ramulosus
## 120  306817                    Hippocampus regulus
## 121  159446                      Hippocampus reidi
## 122  306818               Hippocampus rhynchomacer
## 123  306819                 Hippocampus rosamondae
## 124  398434                   Hippocampus satomiae
## 125  275208               Hippocampus semispinosus
## 126  398435                   Hippocampus severnsi
## 127  275209                   Hippocampus sindonis
## 128  275210              Hippocampus spinosissimus
## 129  306820                   Hippocampus stylifer
## 130  306821               Hippocampus subcoronatus
## 131  275211               Hippocampus subelongatus
## 132  306822                  Hippocampus suezensis
## 133  306823                   Hippocampus taeniops
## 134  306824               Hippocampus taeniopterus
## 135  306825                  Hippocampus takakurae
## 136  306826                Hippocampus tetragonous
## 137  212232               Hippocampus trimaculatus
## 138  306827                    Hippocampus tristis
## 139  306828               Hippocampus tuberculatus
## 140  474956                       Hippocampus tyro
## 141  306829                   Hippocampus villosus
## 142  306830                   Hippocampus vulgaris
## 143  398436                 Hippocampus waleananus
## 144  212235                     Hippocampus whitei
## 145  275212                      Hippocampus zebra
## 146  275213                   Hippocampus zosterae
## 2    843450                                   <NA>
## 3    843451                                   <NA>
## 4    843452                                   <NA>
##                                                               authority
## 1                                                      Rafinesque, 1810
## 5                                                      Rafinesque, 1810
## 6                                                          Lesson, 1827
## 7                                                          Fowler, 1907
## 8                                                           Roule, 1916
## 9                                                           Roule, 1916
## 10                                                         Kuiter, 2001
## 11                                                           Kaup, 1856
## 12                                                        Günther, 1870
## 13                                                          Leach, 1814
## 14                                                          Risso, 1827
## 15                                                          Roule, 1916
## 16                                                          Roule, 1916
## 17                                                Jordan & Snyder, 1902
## 18                                            Jordan & Richardson, 1908
## 19                                                        Whitley, 1970
## 20                                                           Kaup, 1856
## 21                                                         Kuiter, 2001
## 22                                                         Fowler, 1907
## 23                                                        Duméril, 1870
## 24                                                        Duncker, 1914
## 25                                                         Peters, 1869
## 26                                                         Schinz, 1822
## 27                                                           Bean, 1906
## 28                                                       Bianconi, 1854
## 29                                                       Bianconi, 1854
## 30                                                       Bianconi, 1854
## 31                                                      Boulenger, 1900
## 32                                         Zhang, Qin, Wang & Lin, 2016
## 33                                                     Basilewsky, 1855
## 34                                                         Kuiter, 2003
## 35                                                         Cantor, 1849
## 36                                            Temminck & Schlegel, 1850
## 37                                                         Fricke, 2004
## 38                                                         Ogilby, 1908
## 39                                                        Duméril, 1861
## 40                                                 Gomon & Kuiter, 2009
## 41                                               Lourie & Randall, 2003
## 42                                                         Fowler, 1922
## 43                                                      Castelnau, 1873
## 44                                                          Perry, 1810
## 45                                                        Günther, 1870
## 46                                                       Ginsburg, 1933
## 47                                                           Kaup, 1856
## 48                                                        Duméril, 1870
## 49                                              Jordan & Evermann, 1903
## 50                                                        Rüppell, 1838
## 51                                                        Rüppell, 1838
## 52                                                      McCulloch, 1911
## 53                                                           Gill, 1862
## 54                                            Temminck & Schlegel, 1850
## 55                                                         Kuiter, 2001
## 56                                                         Cuvier, 1829
## 57                                                    Slastenenko, 1937
## 58                                                       Ginsburg, 1937
## 59                                          Han, Kim, Kai & Senou, 2017
## 60                                                         Kuiter, 2001
## 61                                                     Rafinesque, 1810
## 62                                                       Ginsburg, 1933
## 63                                              Jordan & Evermann, 1903
## 64                                                     (Linnaeus, 1758)
## 65                                                    Slastenenko, 1938
## 66                                                    Slastenenko, 1937
## 67                                                           Kaup, 1856
## 68                                                        Duncker, 1926
## 69                                                          DeKay, 1842
## 70                                                           Kaup, 1856
## 71                                                         Girard, 1858
## 72                     Short, Smith, Motomura, Harasti & Hamilton, 2018
## 73                                                           Kaup, 1856
## 74                                                      Boulenger, 1900
## 75                                                         Kuiter, 2001
## 76                                                        Bleeker, 1854
## 77                                                        Duméril, 1870
## 78                                                Jordan & Snyder, 1901
## 79                                             Townsend & Barbour, 1906
## 80                                                        Bleeker, 1852
## 81                                                            Raj, 1941
## 82                                                           Kaup, 1856
## 83                                                         De Vis, 1908
## 84                                                           Kaup, 1856
## 85                                                         Schinz, 1822
## 86                                                        Bleeker, 1856
## 87                                                         Cantor, 1849
## 88                                                           Kaup, 1856
## 89                                                        Bleeker, 1854
## 90                                                          Gomon, 1997
## 91                                                        Bleeker, 1853
## 92                                                        Bleeker, 1852
## 93                                                        Bleeker, 1853
## 94                                                        Bleeker, 1853
## 95                                                        Bleeker, 1853
## 96                                                         Kuiter, 2001
## 97                                                         Kuiter, 2001
## 98  Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 99                                                      von Bonde, 1923
## 100                                                        Fowler, 1944
## 101                                                  Steindachner, 1866
## 102                                                  Steindachner, 1866
## 103                                          Hemprich & Ehrenberg, 1856
## 104                                                     Ehrenberg, 1871
## 105                                                      Ginsburg, 1933
## 106                                                Foster & Gomon, 2010
## 107                                         Piacentino & Luzzatto, 2004
## 108                                                    Rafinesque, 1810
## 109                                                        Peters, 1877
## 110                                                 Howell Rivero, 1934
## 111                                                       Bleeker, 1854
## 112                                               Lourie & Kuiter, 2008
## 113                                                        Kuiter, 2001
## 114                                                     Guichenot, 1853
## 115                                                          Kaup, 1856
## 116                                                        Fricke, 2004
## 117                                                         Horne, 2001
## 118                                                       Whitley, 1955
## 119                                                         Leach, 1814
## 120                                                      Ginsburg, 1933
## 121                                                      Ginsburg, 1933
## 122                                                       Duméril, 1870
## 123                                                       Borodin, 1928
## 124                                               Lourie & Kuiter, 2008
## 125                                                        Kuiter, 2001
## 126                                               Lourie & Kuiter, 2008
## 127                                               Jordan & Snyder, 1901
## 128                                                         Weber, 1913
## 129                                              Jordan & Gilbert, 1882
## 130                                                       Günther, 1866
## 131                                                     Castelnau, 1873
## 132                                                       Duncker, 1940
## 133                                                        Fowler, 1904
## 134                                                       Bleeker, 1852
## 135                                                        Tanaka, 1916
## 136                                                    (Mitchill, 1814)
## 137                                                         Leach, 1814
## 138                                                     Castelnau, 1872
## 139                                                     Castelnau, 1875
## 140                                              Randall & Lourie, 2009
## 141                                                       Günther, 1880
## 142                                                       Cloquet, 1821
## 143                                                Gomon & Kuiter, 2009
## 144                                                       Bleeker, 1855
## 145                                                       Whitley, 1964
## 146                                              Jordan & Gilbert, 1882
## 2                                                                  <NA>
## 3                                                                  <NA>
## 4                                                                  <NA>
##          status
## 1      accepted
## 5      accepted
## 6      accepted
## 7    unaccepted
## 8    unaccepted
## 9    unaccepted
## 10     accepted
## 11     accepted
## 12     accepted
## 13   unaccepted
## 14   unaccepted
## 15   unaccepted
## 16   unaccepted
## 17   unaccepted
## 18     accepted
## 19     accepted
## 20   unaccepted
## 21   unaccepted
## 22   unaccepted
## 23     accepted
## 24   unaccepted
## 25     accepted
## 26   unaccepted
## 27   unaccepted
## 28   unaccepted
## 29   unaccepted
## 30     accepted
## 31     accepted
## 32     accepted
## 33   unaccepted
## 34     accepted
## 35     accepted
## 36     accepted
## 37     accepted
## 38   unaccepted
## 39   unaccepted
## 40     accepted
## 41     accepted
## 42   unaccepted
## 43   unaccepted
## 44     accepted
## 45   unaccepted
## 46   unaccepted
## 47   unaccepted
## 48   unaccepted
## 49     accepted
## 50     accepted
## 51     accepted
## 52   unaccepted
## 53   unaccepted
## 54   unaccepted
## 55     accepted
## 56     accepted
## 57     accepted
## 58   unaccepted
## 59     accepted
## 60     accepted
## 61   unaccepted
## 62   unaccepted
## 63   unaccepted
## 64     accepted
## 65   unaccepted
## 66   unaccepted
## 67     accepted
## 68   unaccepted
## 69   unaccepted
## 70   unaccepted
## 71     accepted
## 72     accepted
## 73   unaccepted
## 74     accepted
## 75     accepted
## 76   unaccepted
## 77   unaccepted
## 78     accepted
## 79   unaccepted
## 80     accepted
## 81   unaccepted
## 82   unaccepted
## 83   unaccepted
## 84     accepted
## 85   unaccepted
## 86   unaccepted
## 87   unaccepted
## 88   unaccepted
## 89   unaccepted
## 90     accepted
## 91     accepted
## 92   unaccepted
## 93   unaccepted
## 94   unaccepted
## 95   unaccepted
## 96     accepted
## 97     accepted
## 98     accepted
## 99   unaccepted
## 100  unaccepted
## 101  unaccepted
## 102  unaccepted
## 103  unaccepted
## 104  unaccepted
## 105  unaccepted
## 106    accepted
## 107    accepted
## 108  unaccepted
## 109    accepted
## 110  unaccepted
## 111  unaccepted
## 112    accepted
## 113  unaccepted
## 114  unaccepted
## 115  unaccepted
## 116    accepted
## 117    accepted
## 118  unaccepted
## 119  unaccepted
## 120  unaccepted
## 121    accepted
## 122  unaccepted
## 123  unaccepted
## 124    accepted
## 125    accepted
## 126  unaccepted
## 127    accepted
## 128    accepted
## 129  unaccepted
## 130  unaccepted
## 131    accepted
## 132    accepted
## 133  unaccepted
## 134  unaccepted
## 135  unaccepted
## 136  unaccepted
## 137    accepted
## 138  unaccepted
## 139  unaccepted
## 140    accepted
## 141  unaccepted
## 142  unaccepted
## 143    accepted
## 144    accepted
## 145    accepted
## 146    accepted
## 2   quarantined
## 3   quarantined
## 4   quarantined
## 
## More than one WORMS ID found for taxon 'Hippocampus'!
## 
##                   Enter rownumber of taxon (other inputs will return 'NA'):
##          id                                target
## 1    126224                           Hippocampus
## 5   1525460                           Hippocampus
## 6    275182               Hippocampus abdominalis
## 10   275183                    Hippocampus alatus
## 11   275184                 Hippocampus algiricus
## 12   275185                  Hippocampus angustus
## 18   275186                  Hippocampus barbouri
## 19   275187                Hippocampus bargibanti
## 23   212238              Hippocampus borboniensis
## 25   275189                 Hippocampus breviceps
## 30   212233            Hippocampus camelopardalis
## 31   212234                  Hippocampus capensis
## 32   886550                  Hippocampus casscsio
## 34   275190                  Hippocampus colemani
## 35   275191                     Hippocampus comes
## 36   275192                 Hippocampus coronatus
## 37   388711               Hippocampus curvicuspis
## 40   398432                  Hippocampus debelius
## 41   275193                    Hippocampus denise
## 44   159445                   Hippocampus erectus
## 49   275194                   Hippocampus fisheri
## 50   212230                    Hippocampus fuscus
## 51   212230                    Hippocampus fuscus
## 55   275195                Hippocampus grandiceps
## 56   154776                Hippocampus guttulatus
## 57   248042 Hippocampus guttulatus microstephanus
## 59  1376208                     Hippocampus haema
## 60   275196                  Hippocampus hendriki
## 64   127380               Hippocampus hippocampus
## 67   212239                   Hippocampus histrix
## 71   275197                    Hippocampus ingens
## 72  1288522                  Hippocampus japapigu
## 74   275198                  Hippocampus jayakari
## 75   275199                   Hippocampus jugumus
## 78   212236                  Hippocampus kelloggi
## 80   212237                      Hippocampus kuda
## 84   275200            Hippocampus lichtensteinii
## 90   275201                  Hippocampus minotaur
## 91   275202                  Hippocampus mohnikei
## 96   275203           Hippocampus montebelloensis
## 97   275204               Hippocampus multispinus
## 98  1437127                      Hippocampus nalu
## 106  712534                 Hippocampus paradoxus
## 107  275205               Hippocampus patagonicus
## 109  306811                Hippocampus planifrons
## 112  398433                   Hippocampus pontohi
## 116  388712                  Hippocampus pusillus
## 117  275207            Hippocampus queenslandicus
## 121  159446                     Hippocampus reidi
## 124  398434                  Hippocampus satomiae
## 125  275208              Hippocampus semispinosus
## 127  275209                  Hippocampus sindonis
## 128  275210             Hippocampus spinosissimus
## 131  275211              Hippocampus subelongatus
## 132  306822                 Hippocampus suezensis
## 137  212232              Hippocampus trimaculatus
## 140  474956                      Hippocampus tyro
## 143  398436                Hippocampus waleananus
## 144  212235                    Hippocampus whitei
## 145  275212                     Hippocampus zebra
## 146  275213                  Hippocampus zosterae
##                                                               authority
## 1                                                      Rafinesque, 1810
## 5                                                      Rafinesque, 1810
## 6                                                          Lesson, 1827
## 10                                                         Kuiter, 2001
## 11                                                           Kaup, 1856
## 12                                                        Günther, 1870
## 18                                            Jordan & Richardson, 1908
## 19                                                        Whitley, 1970
## 23                                                        Duméril, 1870
## 25                                                         Peters, 1869
## 30                                                       Bianconi, 1854
## 31                                                      Boulenger, 1900
## 32                                         Zhang, Qin, Wang & Lin, 2016
## 34                                                         Kuiter, 2003
## 35                                                         Cantor, 1849
## 36                                            Temminck & Schlegel, 1850
## 37                                                         Fricke, 2004
## 40                                                 Gomon & Kuiter, 2009
## 41                                               Lourie & Randall, 2003
## 44                                                          Perry, 1810
## 49                                              Jordan & Evermann, 1903
## 50                                                        Rüppell, 1838
## 51                                                        Rüppell, 1838
## 55                                                         Kuiter, 2001
## 56                                                         Cuvier, 1829
## 57                                                    Slastenenko, 1937
## 59                                          Han, Kim, Kai & Senou, 2017
## 60                                                         Kuiter, 2001
## 64                                                     (Linnaeus, 1758)
## 67                                                           Kaup, 1856
## 71                                                         Girard, 1858
## 72                     Short, Smith, Motomura, Harasti & Hamilton, 2018
## 74                                                      Boulenger, 1900
## 75                                                         Kuiter, 2001
## 78                                                Jordan & Snyder, 1901
## 80                                                        Bleeker, 1852
## 84                                                           Kaup, 1856
## 90                                                          Gomon, 1997
## 91                                                        Bleeker, 1853
## 96                                                         Kuiter, 2001
## 97                                                         Kuiter, 2001
## 98  Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 106                                                Foster & Gomon, 2010
## 107                                         Piacentino & Luzzatto, 2004
## 109                                                        Peters, 1877
## 112                                               Lourie & Kuiter, 2008
## 116                                                        Fricke, 2004
## 117                                                         Horne, 2001
## 121                                                      Ginsburg, 1933
## 124                                               Lourie & Kuiter, 2008
## 125                                                        Kuiter, 2001
## 127                                               Jordan & Snyder, 1901
## 128                                                         Weber, 1913
## 131                                                     Castelnau, 1873
## 132                                                       Duncker, 1940
## 137                                                         Leach, 1814
## 140                                              Randall & Lourie, 2009
## 143                                                Gomon & Kuiter, 2009
## 144                                                       Bleeker, 1855
## 145                                                       Whitley, 1964
## 146                                              Jordan & Gilbert, 1882
##       status
## 1   accepted
## 5   accepted
## 6   accepted
## 10  accepted
## 11  accepted
## 12  accepted
## 18  accepted
## 19  accepted
## 23  accepted
## 25  accepted
## 30  accepted
## 31  accepted
## 32  accepted
## 34  accepted
## 35  accepted
## 36  accepted
## 37  accepted
## 40  accepted
## 41  accepted
## 44  accepted
## 49  accepted
## 50  accepted
## 51  accepted
## 55  accepted
## 56  accepted
## 57  accepted
## 59  accepted
## 60  accepted
## 64  accepted
## 67  accepted
## 71  accepted
## 72  accepted
## 74  accepted
## 75  accepted
## 78  accepted
## 80  accepted
## 84  accepted
## 90  accepted
## 91  accepted
## 96  accepted
## 97  accepted
## 98  accepted
## 106 accepted
## 107 accepted
## 109 accepted
## 112 accepted
## 116 accepted
## 117 accepted
## 121 accepted
## 124 accepted
## 125 accepted
## 127 accepted
## 128 accepted
## 131 accepted
## 132 accepted
## 137 accepted
## 140 accepted
## 143 accepted
## 144 accepted
## 145 accepted
## 146 accepted
## 
## More than one WORMS ID found for taxon 'Hippocampus'!
## 
##                   Enter rownumber of taxon (other inputs will return 'NA'):
##          id                                 target
## 1    126224                            Hippocampus
## 5   1525460                            Hippocampus
## 6    275182                Hippocampus abdominalis
## 7    306774                    Hippocampus agnesae
## 8    306775                      Hippocampus aimei
## 9    716772                      Hippocampus aimei
## 10   275183                     Hippocampus alatus
## 11   275184                  Hippocampus algiricus
## 12   275185                   Hippocampus angustus
## 13   306776                 Hippocampus antiquorum
## 14   306777                   Hippocampus antiquus
## 15   400954                      Hippocampus arnei
## 16   716773                      Hippocampus arnei
## 17   306778                  Hippocampus aterrimus
## 18   275186                   Hippocampus barbouri
## 19   275187                 Hippocampus bargibanti
## 20   713214                   Hippocampus bicuspis
## 21   275188                Hippocampus biocellatus
## 22   306779                   Hippocampus bleekeri
## 23   212238               Hippocampus borboniensis
## 24   306780             Hippocampus brachyrhynchus
## 25   275189                  Hippocampus breviceps
## 26   306781               Hippocampus brevirostris
## 27   306782                   Hippocampus brunneus
## 28   400945            Hippocampus cameleopardalis
## 29   400946            Hippocampus cameleopardalus
## 30   212233             Hippocampus camelopardalis
## 31   212234                   Hippocampus capensis
## 32   886550                   Hippocampus casscsio
## 33   306783                  Hippocampus chinensis
## 34   275190                   Hippocampus colemani
## 35   275191                      Hippocampus comes
## 36   275192                  Hippocampus coronatus
## 37   388711                Hippocampus curvicuspis
## 38   306784                      Hippocampus dahli
## 39   306785                     Hippocampus deanei
## 40   398432                   Hippocampus debelius
## 41   275193                     Hippocampus denise
## 42   306786               Hippocampus ecuadorensis
## 43   306787                  Hippocampus elongatus
## 44   159445                    Hippocampus erectus
## 45   306788                  Hippocampus erinaceus
## 46   154815                  Hippocampus europaeus
## 47   306789               Hippocampus fascicularis
## 48   713215               Hippocampus filamentosus
## 49   275194                    Hippocampus fisheri
## 50   212230                     Hippocampus fuscus
## 51   212230                     Hippocampus fuscus
## 52   306790              Hippocampus graciliformis
## 53   306791                   Hippocampus gracilis
## 54   306792              Hippocampus gracilissimus
## 55   275195                 Hippocampus grandiceps
## 56   154776                 Hippocampus guttulatus
## 57   248042  Hippocampus guttulatus microstephanus
## 58   323136  Hippocampus guttulatus multiannularis
## 59  1376208                      Hippocampus haema
## 60   275196                   Hippocampus hendriki
## 61   154458                 Hippocampus heptagonus
## 62   306793                Hippocampus hildebrandi
## 63   306794                    Hippocampus hilonis
## 64   127380                Hippocampus hippocampus
## 65   322937 Hippocampus hippocampus microcoronatus
## 66   322938 Hippocampus hippocampus microstephanus
## 67   212239                    Hippocampus histrix
## 68   306795                      Hippocampus horai
## 69   306796                  Hippocampus hudsonius
## 70   400949                    Hippocampus hystrix
## 71   275197                     Hippocampus ingens
## 72  1288522                   Hippocampus japapigu
## 73   306797                  Hippocampus japonicus
## 74   275198                   Hippocampus jayakari
## 75   275199                    Hippocampus jugumus
## 76   306798           Hippocampus kampylotrachelos
## 77   306799                     Hippocampus kaupii
## 78   212236                   Hippocampus kelloggi
## 79   306800                   Hippocampus kincaidi
## 80   212237                       Hippocampus kuda
## 81   323205        Hippocampus kuda multiannularis
## 82   306801              Hippocampus laevicaudatus
## 83   713217                      Hippocampus lenis
## 84   275200             Hippocampus lichtensteinii
## 85   154777               Hippocampus longirostris
## 86   306802                 Hippocampus manadensis
## 87   306803                   Hippocampus mannulus
## 88   306804                 Hippocampus marginalis
## 89   306805               Hippocampus melanospilos
## 90   275201                   Hippocampus minotaur
## 91   275202                   Hippocampus mohnikei
## 92   306806                Hippocampus moluccensis
## 93   400952                    Hippocampus monckei
## 94   400951                   Hippocampus monickei
## 95   400953                    Hippocampus monikei
## 96   275203            Hippocampus montebelloensis
## 97   275204                Hippocampus multispinus
## 98  1437127                       Hippocampus nalu
## 99   306807                 Hippocampus natalensis
## 100  306808             Hippocampus novaehebudorum
## 101  400955             Hippocampus novaehollandae
## 102  306809            Hippocampus novaehollandiae
## 103  713212                   Hippocampus obscurus
## 104  713213                   Hippocampus obscurus
## 105  306810                    Hippocampus obtusus
## 106  712534                  Hippocampus paradoxus
## 107  275205                Hippocampus patagonicus
## 108  400947                 Hippocampus pentagonus
## 109  306811                 Hippocampus planifrons
## 110  306812                      Hippocampus poeyi
## 111  306813                 Hippocampus polytaenia
## 112  398433                    Hippocampus pontohi
## 113  275206                   Hippocampus procerus
## 114  306814                Hippocampus punctulatus
## 115  306815                Hippocampus punctulatus
## 116  388712                   Hippocampus pusillus
## 117  275207             Hippocampus queenslandicus
## 118  306816                       Hippocampus raji
## 119  127381                  Hippocampus ramulosus
## 120  306817                    Hippocampus regulus
## 121  159446                      Hippocampus reidi
## 122  306818               Hippocampus rhynchomacer
## 123  306819                 Hippocampus rosamondae
## 124  398434                   Hippocampus satomiae
## 125  275208               Hippocampus semispinosus
## 126  398435                   Hippocampus severnsi
## 127  275209                   Hippocampus sindonis
## 128  275210              Hippocampus spinosissimus
## 129  306820                   Hippocampus stylifer
## 130  306821               Hippocampus subcoronatus
## 131  275211               Hippocampus subelongatus
## 132  306822                  Hippocampus suezensis
## 133  306823                   Hippocampus taeniops
## 134  306824               Hippocampus taeniopterus
## 135  306825                  Hippocampus takakurae
## 136  306826                Hippocampus tetragonous
## 137  212232               Hippocampus trimaculatus
## 138  306827                    Hippocampus tristis
## 139  306828               Hippocampus tuberculatus
## 140  474956                       Hippocampus tyro
## 141  306829                   Hippocampus villosus
## 142  306830                   Hippocampus vulgaris
## 143  398436                 Hippocampus waleananus
## 144  212235                     Hippocampus whitei
## 145  275212                      Hippocampus zebra
## 146  275213                   Hippocampus zosterae
## 2    843450                                   <NA>
## 3    843451                                   <NA>
## 4    843452                                   <NA>
##                                                               authority
## 1                                                      Rafinesque, 1810
## 5                                                      Rafinesque, 1810
## 6                                                          Lesson, 1827
## 7                                                          Fowler, 1907
## 8                                                           Roule, 1916
## 9                                                           Roule, 1916
## 10                                                         Kuiter, 2001
## 11                                                           Kaup, 1856
## 12                                                        Günther, 1870
## 13                                                          Leach, 1814
## 14                                                          Risso, 1827
## 15                                                          Roule, 1916
## 16                                                          Roule, 1916
## 17                                                Jordan & Snyder, 1902
## 18                                            Jordan & Richardson, 1908
## 19                                                        Whitley, 1970
## 20                                                           Kaup, 1856
## 21                                                         Kuiter, 2001
## 22                                                         Fowler, 1907
## 23                                                        Duméril, 1870
## 24                                                        Duncker, 1914
## 25                                                         Peters, 1869
## 26                                                         Schinz, 1822
## 27                                                           Bean, 1906
## 28                                                       Bianconi, 1854
## 29                                                       Bianconi, 1854
## 30                                                       Bianconi, 1854
## 31                                                      Boulenger, 1900
## 32                                         Zhang, Qin, Wang & Lin, 2016
## 33                                                     Basilewsky, 1855
## 34                                                         Kuiter, 2003
## 35                                                         Cantor, 1849
## 36                                            Temminck & Schlegel, 1850
## 37                                                         Fricke, 2004
## 38                                                         Ogilby, 1908
## 39                                                        Duméril, 1861
## 40                                                 Gomon & Kuiter, 2009
## 41                                               Lourie & Randall, 2003
## 42                                                         Fowler, 1922
## 43                                                      Castelnau, 1873
## 44                                                          Perry, 1810
## 45                                                        Günther, 1870
## 46                                                       Ginsburg, 1933
## 47                                                           Kaup, 1856
## 48                                                        Duméril, 1870
## 49                                              Jordan & Evermann, 1903
## 50                                                        Rüppell, 1838
## 51                                                        Rüppell, 1838
## 52                                                      McCulloch, 1911
## 53                                                           Gill, 1862
## 54                                            Temminck & Schlegel, 1850
## 55                                                         Kuiter, 2001
## 56                                                         Cuvier, 1829
## 57                                                    Slastenenko, 1937
## 58                                                       Ginsburg, 1937
## 59                                          Han, Kim, Kai & Senou, 2017
## 60                                                         Kuiter, 2001
## 61                                                     Rafinesque, 1810
## 62                                                       Ginsburg, 1933
## 63                                              Jordan & Evermann, 1903
## 64                                                     (Linnaeus, 1758)
## 65                                                    Slastenenko, 1938
## 66                                                    Slastenenko, 1937
## 67                                                           Kaup, 1856
## 68                                                        Duncker, 1926
## 69                                                          DeKay, 1842
## 70                                                           Kaup, 1856
## 71                                                         Girard, 1858
## 72                     Short, Smith, Motomura, Harasti & Hamilton, 2018
## 73                                                           Kaup, 1856
## 74                                                      Boulenger, 1900
## 75                                                         Kuiter, 2001
## 76                                                        Bleeker, 1854
## 77                                                        Duméril, 1870
## 78                                                Jordan & Snyder, 1901
## 79                                             Townsend & Barbour, 1906
## 80                                                        Bleeker, 1852
## 81                                                            Raj, 1941
## 82                                                           Kaup, 1856
## 83                                                         De Vis, 1908
## 84                                                           Kaup, 1856
## 85                                                         Schinz, 1822
## 86                                                        Bleeker, 1856
## 87                                                         Cantor, 1849
## 88                                                           Kaup, 1856
## 89                                                        Bleeker, 1854
## 90                                                          Gomon, 1997
## 91                                                        Bleeker, 1853
## 92                                                        Bleeker, 1852
## 93                                                        Bleeker, 1853
## 94                                                        Bleeker, 1853
## 95                                                        Bleeker, 1853
## 96                                                         Kuiter, 2001
## 97                                                         Kuiter, 2001
## 98  Short, Claassens, Smith, De Brauwer, Hamilton, Stat & Harasti, 2020
## 99                                                      von Bonde, 1923
## 100                                                        Fowler, 1944
## 101                                                  Steindachner, 1866
## 102                                                  Steindachner, 1866
## 103                                          Hemprich & Ehrenberg, 1856
## 104                                                     Ehrenberg, 1871
## 105                                                      Ginsburg, 1933
## 106                                                Foster & Gomon, 2010
## 107                                         Piacentino & Luzzatto, 2004
## 108                                                    Rafinesque, 1810
## 109                                                        Peters, 1877
## 110                                                 Howell Rivero, 1934
## 111                                                       Bleeker, 1854
## 112                                               Lourie & Kuiter, 2008
## 113                                                        Kuiter, 2001
## 114                                                     Guichenot, 1853
## 115                                                          Kaup, 1856
## 116                                                        Fricke, 2004
## 117                                                         Horne, 2001
## 118                                                       Whitley, 1955
## 119                                                         Leach, 1814
## 120                                                      Ginsburg, 1933
## 121                                                      Ginsburg, 1933
## 122                                                       Duméril, 1870
## 123                                                       Borodin, 1928
## 124                                               Lourie & Kuiter, 2008
## 125                                                        Kuiter, 2001
## 126                                               Lourie & Kuiter, 2008
## 127                                               Jordan & Snyder, 1901
## 128                                                         Weber, 1913
## 129                                              Jordan & Gilbert, 1882
## 130                                                       Günther, 1866
## 131                                                     Castelnau, 1873
## 132                                                       Duncker, 1940
## 133                                                        Fowler, 1904
## 134                                                       Bleeker, 1852
## 135                                                        Tanaka, 1916
## 136                                                    (Mitchill, 1814)
## 137                                                         Leach, 1814
## 138                                                     Castelnau, 1872
## 139                                                     Castelnau, 1875
## 140                                              Randall & Lourie, 2009
## 141                                                       Günther, 1880
## 142                                                       Cloquet, 1821
## 143                                                Gomon & Kuiter, 2009
## 144                                                       Bleeker, 1855
## 145                                                       Whitley, 1964
## 146                                              Jordan & Gilbert, 1882
## 2                                                                  <NA>
## 3                                                                  <NA>
## 4                                                                  <NA>
##          status
## 1      accepted
## 5      accepted
## 6      accepted
## 7    unaccepted
## 8    unaccepted
## 9    unaccepted
## 10     accepted
## 11     accepted
## 12     accepted
## 13   unaccepted
## 14   unaccepted
## 15   unaccepted
## 16   unaccepted
## 17   unaccepted
## 18     accepted
## 19     accepted
## 20   unaccepted
## 21   unaccepted
## 22   unaccepted
## 23     accepted
## 24   unaccepted
## 25     accepted
## 26   unaccepted
## 27   unaccepted
## 28   unaccepted
## 29   unaccepted
## 30     accepted
## 31     accepted
## 32     accepted
## 33   unaccepted
## 34     accepted
## 35     accepted
## 36     accepted
## 37     accepted
## 38   unaccepted
## 39   unaccepted
## 40     accepted
## 41     accepted
## 42   unaccepted
## 43   unaccepted
## 44     accepted
## 45   unaccepted
## 46   unaccepted
## 47   unaccepted
## 48   unaccepted
## 49     accepted
## 50     accepted
## 51     accepted
## 52   unaccepted
## 53   unaccepted
## 54   unaccepted
## 55     accepted
## 56     accepted
## 57     accepted
## 58   unaccepted
## 59     accepted
## 60     accepted
## 61   unaccepted
## 62   unaccepted
## 63   unaccepted
## 64     accepted
## 65   unaccepted
## 66   unaccepted
## 67     accepted
## 68   unaccepted
## 69   unaccepted
## 70   unaccepted
## 71     accepted
## 72     accepted
## 73   unaccepted
## 74     accepted
## 75     accepted
## 76   unaccepted
## 77   unaccepted
## 78     accepted
## 79   unaccepted
## 80     accepted
## 81   unaccepted
## 82   unaccepted
## 83   unaccepted
## 84     accepted
## 85   unaccepted
## 86   unaccepted
## 87   unaccepted
## 88   unaccepted
## 89   unaccepted
## 90     accepted
## 91     accepted
## 92   unaccepted
## 93   unaccepted
## 94   unaccepted
## 95   unaccepted
## 96     accepted
## 97     accepted
## 98     accepted
## 99   unaccepted
## 100  unaccepted
## 101  unaccepted
## 102  unaccepted
## 103  unaccepted
## 104  unaccepted
## 105  unaccepted
## 106    accepted
## 107    accepted
## 108  unaccepted
## 109    accepted
## 110  unaccepted
## 111  unaccepted
## 112    accepted
## 113  unaccepted
## 114  unaccepted
## 115  unaccepted
## 116    accepted
## 117    accepted
## 118  unaccepted
## 119  unaccepted
## 120  unaccepted
## 121    accepted
## 122  unaccepted
## 123  unaccepted
## 124    accepted
## 125    accepted
## 126  unaccepted
## 127    accepted
## 128    accepted
## 129  unaccepted
## 130  unaccepted
## 131    accepted
## 132    accepted
## 133  unaccepted
## 134  unaccepted
## 135  unaccepted
## 136  unaccepted
## 137    accepted
## 138  unaccepted
## 139  unaccepted
## 140    accepted
## 141  unaccepted
## 142  unaccepted
## 143    accepted
## 144    accepted
## 145    accepted
## 146    accepted
## 2   quarantined
## 3   quarantined
## 4   quarantined
## 
## More than one WORMS ID found for taxon 'Hippocampus'!
## 
##                   Enter rownumber of taxon (other inputs will return 'NA'):
##        id                target                      authority   status
## 2  835080   Novaculops alvheimi                  Randall, 2013 accepted
## 3 1431860 Novaculops compressus                    Fukui, 2020 accepted
## 4 1009705  Novaculops halsteadi        (Randall & Lobel, 2003) accepted
## 5 1009528   Novaculops koteamea        (Randall & Allen, 2004) accepted
## 6 1008571  Novaculops pastellus (Randall, Earle & Rocha, 2008) accepted
## 7 1017252  Novaculops sciistius      (Jordan & Thompson, 1914) accepted
## 8  319690      Novaculops woodi                (Jenkins, 1901) accepted
## 
## More than one WORMS ID found for taxon 'Novaculops'!
## 
##                   Enter rownumber of taxon (other inputs will return 'NA'):
#combine original names, parsed data and WoRMS ID into one data frame
combined_dataframe <- data.frame()

for (i in 1:length(cleaned_data)) {
  cleaned_value <- cleaned_data[i]
  canonical_value <- parsed_names[[i]]$canonical$full
  worms_id_value <- worms_ids[[i]][1]
  if (is.null(canonical_value)){
    canonical_value <- NA
  }
  temp_row <- data.frame(CleanedData = cleaned_value, CanonicalFull = canonical_value, WormsIDs = worms_id_value)
  combined_dataframe <- rbind(combined_dataframe, temp_row)
}

knitr::kable(head(combined_dataframe))
CleanedData CanonicalFull WormsIDs
Carcharhinus amblyrhynchos (Bleeker, Carcharhinus amblyrhynchos 217337
Carcharhinus galapagensis (Snodgrass & Heller, Carcharhinus galapagensis 105790
Galeocerdo cuvier (peron & Le Sueur, Galeocerdo cuvier 105799
Dasyatis thetidis Waite, 1899 Dasyatis thetidis 212247
Taeniura meyeni (Muller & Henle, Taeniura meyeni 217421
Albula neoguinaica Valenciennes, Albula neoguinaica 212254

Human Verification

Sometimes there are misspellings in the original text or incorrect OCR that can be searched for and fixed by hand. To do this, view the combined dataframe, search for unmatched species in WoRMS and add the ID, and remove rows that were not autoremoved in the earlier cleaning steps

combined_dataframe[9,2:3] = c("Enchelycore ramosus", 399857)
combined_dataframe[17, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Gymnothorax", "sp. A", 125636)
combined_dataframe[18, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Gymnothorax", "sp. B", 125636)
combined_dataframe[19, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Gymnothorax", "sp. C", 125636)
combined_dataframe[27,2:3] = c("Scarus psittacus", 219125)
combined_dataframe[29,2:3] = c("Scarus schlegeli", 276060)
combined_dataframe[31,2:3] = c("Limnichthys fasciatus", 277886)
combined_dataframe[33,2:3] = c("Parapercis cylindrica", 219155)
combined_dataframe[34,2:3] = c("Parapercis hexophtalma", 219159)
combined_dataframe[35,2:3] = c("Enneapterygius rufopileus", 277337)
combined_dataframe[37,2:3] = c("Norfolkia squamiceps", 276728)
combined_dataframe[38,2:3] = c("Cristiceps aurantiacus", 276603)
combined_dataframe[39,2:3] = c("Heteroclinus roseus", 281066)
combined_dataframe[40,2:3] = c("Cirripectes alboapicalis", 276716)
combined_dataframe[41,2:3] = c("Cirripectes castaneus", 219266)
combined_dataframe[42,2:3] = c("Cirripectes chelomatus", 276718)
combined_dataframe[43,2:3] = c("Enchelyurus ater", 276882)
combined_dataframe[48,2:3] = c("Plagiotremus rhinorhynchos", 219334)
combined_dataframe[51,2:3] = c("Xiphasia matsubarai", 219344)
combined_dataframe[53,2:3] = c("Ammodytoides vagus", 276860)
combined_dataframe[54,2:3] = c("Callionymus calcaratus", 302219)
combined_dataframe[55,2:3] = c("Amblygobius nocturnus", 219377)
combined_dataframe[57,2:3] = c("Asterropteryx semipunctatus", 219382)
combined_dataframe[60, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Callogobius", "sp. 3", 206441)
combined_dataframe[61, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Callogobius", "sp. 6", 206441)
combined_dataframe[75, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Priolepis", "sp. 3", 203905)
combined_dataframe[76, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Priolepis", "sp. 4", 203905)
combined_dataframe[93,2:3] = c("Zebrasoma scopas", 219679)
combined_dataframe[100,2:3] = c("Bothus pantherinus", 219795)
combined_dataframe[101,2:3] = c("Crossorhombus", 205615)
combined_dataframe[102,2:3] = c("Paraplagusia unicolor", NA)
combined_dataframe[103,2:3] = c("Aseraggodes bahamondei", 279708)
combined_dataframe[104,2:3] = c("Aseraggodes macleayanus", 279729)
combined_dataframe[105,2:3] = c("Aseraggodes ramsaii", 279738)
combined_dataframe[106,2:3] = c("Balistoides conspicillum", 219876)
combined_dataframe[107,2:3] = c("Rhinecanthus aculeatus", 219890)
combined_dataframe[108,2:3] = c("Rhinecanthus rectangulus", 219886)
combined_dataframe[109,2:3] = c("Sufflamen chrysopterus", 219895)
combined_dataframe[110,2:3] = c("Sufflamen freanatus", 403408)
combined_dataframe[111,2:3] = c("Aluterus monoceros", 127407)
combined_dataframe[112,2:3] = c("Brachaluteres taylori", 279952)
combined_dataframe[115,2:3] = c("Cantherhines pardalis", 220058)
combined_dataframe[117,2:3] = c("Oxymonacanthus longirostris", 220063)
combined_dataframe[119,2:3] = c("Thamnaconus analis", 277205)
combined_dataframe[122,2:3] = c("Lactoria fornasini", 219902)
combined_dataframe[164,2:3] = c("Antennarius nummifer", 126530)
combined_dataframe[170,2:3] = c("Lepadichthys frenatus", 279218)
combined_dataframe[171,2:3] = c("Gobiesocidae", 125477)
combined_dataframe[191, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Hippocampus", "sp. A", 126224)
combined_dataframe[192, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Hippocampus", "sp. B", 126224)
combined_dataframe[223,2:3] = c("Pseudanthias pictilis", 277472)
combined_dataframe[229,2:3] = c("Belonepterygion fasciolatum", 279890)
combined_dataframe[231,2:3] = c("Terapon jarbua", 218350)
combined_dataframe[234,2:3] = c("Priacanthus hamrur", 218360)
combined_dataframe[239, c("CanonicalFull", "identificationQualifier", "WormsIDs")] <- c("Apogon", "sp. B", 125913)
combined_dataframe[244,2:3] = c("Sillago ciliata", 273939)
combined_dataframe[256,2:3] = c("Trachinotus baillonii", 218440)
combined_dataframe[259,2:3] = c("Arripis trutta", 279693)
combined_dataframe[273,2:3] = c("Lethrinus nebulosus", 212081)
combined_dataframe[274,2:3] = c("Scolopsis bilineatus", 401948)
combined_dataframe[275,2:3] = c("Mulloidichthys flavolineatus", 218647)
combined_dataframe[276,2:3] = c("Mulloidichthys vanicolensis", 218648)
combined_dataframe[280,2:3] = c("Parupeneus multifasciatus", 277820)
combined_dataframe[281,2:3] = c("Parupeneus pleurostigma", 218656)
combined_dataframe[282,2:3] = c("Parupeneus spilurus", 277825)
combined_dataframe[285,2:3] = c("Pempheris oualensis", 218700)
combined_dataframe[286,2:3] = c("Pempheris vanicolensis", 218701)
combined_dataframe[287,2:3] = c("Girella cyanea", 280857)
combined_dataframe[288,2:3] = c("Girella elevata", 280858)
combined_dataframe[295,2:3] = c("Bathystethus cultratus", 279876)
combined_dataframe[296,2:3] = c("Labracoglossa nitida", 281246)
combined_dataframe[297,2:3] = c("Scorpis lineolatus", 315585)
combined_dataframe[298,2:3] = c("Scorpis violaceus", 315588)
combined_dataframe[302,2:3] = c("Chaetodon citrinellus", 218744)
combined_dataframe[303,2:3] = c("Chaetodon flavirostris", 273337)
combined_dataframe[305,2:3] = c("Chaetodon kleinii", 218738)
combined_dataframe[306,2:3] = c("Chaetodon lineolatus", 218734)
combined_dataframe[307,2:3] = c("Chaetodon lunula", 218733)
combined_dataframe[312,2:3] = c("Chaetodon plebeius", 273354)
combined_dataframe[313,2:3] = c("Chaetodon rainfordi", 273358)
combined_dataframe[314,2:3] = c("Chaetodon speculum", 218740)
combined_dataframe[315,2:3] = c("Chaetodon tricinctus", 273365)
combined_dataframe[316,2:3] = c("Chaetodon trifascialis", 218719)
combined_dataframe[317,2:3] = c("Chaetodon trifasciatus", 218741)
combined_dataframe[319,2:3] = c("Chaetodon unimaculatus", 218753)
combined_dataframe[320,2:3] = c("Chaetodon vagabundus", 218754)
combined_dataframe[321,2:3] = c("Forcipiger flavissimus", 218760)
combined_dataframe[322,2:3] = c("Heniochus acuminatus", 218765)
combined_dataframe[323,2:3] = c("Centropyge bispinosus", 211779)
combined_dataframe[324,2:3] = c("Centropyge tibicen", 278851)
combined_dataframe[325,2:3] = c("Centropyge vrolikii", 278853)
combined_dataframe[326,2:3] = c("Chaetodontoplus conspicillatus", 280116)
combined_dataframe[327,2:3] = c("Chaetodontoplus meredithi", 280120)
combined_dataframe[328,2:3] = c("Genicanthus semicinctus", 279095)
combined_dataframe[329,2:3] = c("Pomacanthus imperator", 220001)
combined_dataframe[330,2:3] = c("Pomacanthus semicirculatus", 220003)
combined_dataframe[331,2:3] = c("Evistias acutirostris", 280765)
combined_dataframe[332,2:3] = c("Abudefduf bengalensis", 212885)
combined_dataframe[333,2:3] = c("Abudefduf sexfasciatus", 159289)
combined_dataframe[334,2:3] = c("Abudefduf sordidus", 212888)
combined_dataframe[335,2:3] = c("Abudefduf vaigiensis", 212879)
combined_dataframe[336,2:3] = c("Abudefduf whitleyi", 273703)
combined_dataframe[337,2:3] = c("Amphiprion latezonatus", 278395)
combined_dataframe[338,2:3] = c("Amphiprion mccullochi", 278397)
combined_dataframe[339,2:3] = c("Chromis atripectoralis", 212812)
combined_dataframe[340,2:3] = c("Chromis flavomaculata", 273727)
combined_dataframe[341,2:3] = c("Chromis hypsilepis", 273730)
combined_dataframe[342,2:3] = c("Chromis margaritifer", 273739)
combined_dataframe[343,2:3] = c("Chromis nitida", 273744)
combined_dataframe[344,2:3] = c("Chromis vanderbilti", 273758)
combined_dataframe[345,2:3] = c("Chrysiptera glauca", 218783)
combined_dataframe[346,2:3] = c("Chrysiptera notialis", 276835)
combined_dataframe[347,2:3] = c("Dascyllus aruanus", 212843)
combined_dataframe[348,2:3] = c("Dascyllus reticulatus", 212844)
combined_dataframe[349,2:3] = c("Dascyllus trimaculatus", 212846)
combined_dataframe[350,2:3] = c("Neoglyphidodon polyacanthus", 278815)
combined_dataframe[351,2:3] = c("Parma alboscapularis", 282152)
combined_dataframe[352,2:3] = c("Parma polylepis", 282159)
combined_dataframe[354,2:3] = c("Plectroglyphidodon johnstonianus", 212859)
combined_dataframe[355,2:3] = c("Plectroglyphidodon lacrymatus", 212860)
combined_dataframe[386,2:3] = c("Coris aygula", 218957)
combined_dataframe[387,2:3] = c("Coris bulbifrons", 273551)
combined_dataframe[388,2:3] = c("Coris gaimard", 218960)
combined_dataframe[370,2:3] = c("Cheilodactylus vittatus", 311553)
combined_dataframe[384,2:3] = c("Choerodon fasciatus", 277268)
combined_dataframe[390,2:3] = c("Coris sandeyeri", 273563)
combined_dataframe[391,2:3] = c("Cymolutes torquatus", 218966)
combined_dataframe[392,2:3] = c("Gomphosus varius", 218975)
combined_dataframe[393,2:3] = c("Halichoeres nebulosus", 218986)
combined_dataframe[395,2:3] = c("Hemigymnus fasciatus", 218999)
combined_dataframe[396,2:3] = c("Hemigymnus melapterus", 218998)
combined_dataframe[399,2:3] = c("Labroides bicolor", 219015)
combined_dataframe[404,2:3] = c("Notolabrus inscriptus", 281790)
combined_dataframe[416,2:3] = c("Thalassoma jansenii", 273582)
combined_dataframe[423,2:3] = c("Xyrichtys jacksonensis", 273599)


combined_dataframe <- combined_dataframe[-c(65),]

Darwin Core mapping

Required Terms

OBIS currently has eight required DwC terms: scientificName, scientificNameID, occurrenceID, eventDate, decimalLongitude, decimalLatitude, occurrenceStatus, basisOfRecord.

scientificName/scientificNameID

Create a dataframe with unique taxa only (though this should already be unique). This will be our primary DarwinCore data frame.

#rename and restructure WoRMSIDs to OBIS requirements
occurrence <- combined_dataframe %>%
  distinct(CanonicalFull, identificationQualifier, WormsIDs) %>%
  rename(scientificName = CanonicalFull) %>%
  rename(scientificNameID = WormsIDs) %>%
  mutate(scientificNameID = ifelse(!is.na(scientificNameID), paste("urn:lsid:marinespecies.org:taxname:", scientificNameID, sep = ""), NA))

occurrenceID

OccurrenceID is an identifier for the occurrence record and should be persistent and globally unique. It is a combination of dataset-shortname:occurrence: and a hash based on the scientific name.

# Vectorize the digest function (The digest() function isn't vectorized. So if you pass in a vector, you get one value for the whole vector rather than a digest for each element of the vector):
vdigest <- Vectorize(digest)

# Generate taxonID:
occurrence %<>% mutate(occurrenceID = paste(short_name, "occurrence", vdigest (paste(scientificName, identificationQualifier), algo="md5"), sep=":"))

eventDate

This is NULL since this is technically a checklist and we do not know the collection date.

eventDate <- ""
occurrence %<>% mutate(eventDate)

decimalLongitude/decimalLatitude

Use obistools::calculate_centroid to calculate a centroid and radius for WKT strings. This is useful for populating decimalLongitude, decimalLatitude and coordinateUncertaintyInMeters. The WKT strings are from https://github.com/iobis/mwhs-shapes.

if (!file.exists(paste(path_to_project_root, "scripts_data/marine_world_heritage.gpkg", sep="/"))) {
  download.file("https://github.com/iobis/mwhs-shapes/blob/master/output/marine_world_heritage.gpkg?raw=true", paste(path_to_project_root, "scripts_data/marine_world_heritage.gpkg", sep="/"))
}

shapes <- st_read(paste(path_to_project_root, "scripts_data/marine_world_heritage.gpkg", sep="/"))
## Reading layer `marine_world_heritage' from data source 
##   `/mnt/c/Users/Chandra Earl/Desktop/Labs/UNESCO/mwhs-data-mobilization/scripts_data/marine_world_heritage.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 60 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -55.32282 xmax: 180 ymax: 71.81381
## Geodetic CRS:  4326
#For some sites, the GeoPackage has core as well as buffer areas. Merge the geometries by site.
shapes_processed <- shapes %>%
  group_by(name) %>%
  summarize()

#Lord Howe Island Group
ind_shape <- shapes_processed$geom[which(shapes_processed$name == "Lord Howe Island Group")]


#convert shape to WKT
wkt <- st_as_text(ind_shape, digits = 6)

localities <- calculate_centroid(wkt)

occurrence %<>% mutate(decimalLatitude = localities$decimalLatitude)
occurrence %<>% mutate(decimalLongitude = localities$decimalLongitude)

occurrenceStatus

occurrenceStatus <- "present"
occurrence %<>% mutate(occurrenceStatus)

basisOfRecord

basisOfRecord <- "HumanObservation"
occurrence %<>% mutate(basisOfRecord)

Extra Terms

footprintWKT

occurrence %<>% mutate(footprintWKT = wkt)

coordinateUncertaintyInMeters

occurrence %<>% mutate(coordinateUncertaintyInMeters = localities$coordinateUncertaintyInMeters)

geodeticDatum

geodeticDatum <- "WGS84"
occurrence %<>% mutate(geodeticDatum)

country

country <- "Australia"
occurrence %<>% mutate(country)

locality

locality <- "Lord Howe Island Group"
occurrence %<>% mutate(locality)

Post-processing

Check data

Use the check_fields command from obistools to check if all OBIS required fields are present in an occurrence table and if any values are missing.

#Reorganize columns
occurrence = occurrence %>% select(occurrenceID, scientificName, identificationQualifier,scientificNameID, eventDate, country, locality, decimalLatitude, decimalLongitude, coordinateUncertaintyInMeters, footprintWKT, geodeticDatum, occurrenceStatus, basisOfRecord)

#Check fields
check_fields(occurrence)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## ℹ The deprecated feature was likely used in the obistools package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 431 × 4
##    level field       row message                                 
##    <chr> <chr>     <int> <chr>                                   
##  1 error eventDate     1 Empty value for required field eventDate
##  2 error eventDate     2 Empty value for required field eventDate
##  3 error eventDate     3 Empty value for required field eventDate
##  4 error eventDate     4 Empty value for required field eventDate
##  5 error eventDate     5 Empty value for required field eventDate
##  6 error eventDate     6 Empty value for required field eventDate
##  7 error eventDate     7 Empty value for required field eventDate
##  8 error eventDate     8 Empty value for required field eventDate
##  9 error eventDate     9 Empty value for required field eventDate
## 10 error eventDate    10 Empty value for required field eventDate
## # ℹ 421 more rows

Create the EML file

This is a file which contains the dataset’s metadata and is required in a DarwinCore-Archive.

emld::eml_version("eml-2.1.1")
## [1] "eml-2.1.1"
#Title
title <- "Checklist of the coastal fishes of Lord Howe, Norfolk and Kermadec Islands, southwest Pacific Ocean: Fishes Checklist"

#AlternateIdentifier
alternateIdentifier <- paste("https://ipt.obis.org/secretariat/resource?r=", short_name, sep="")

#Abstract
abstract <- eml$abstract(
  para = "A checklist ofcoastal fishes includes 433 species from Lord Howe Island, 254 from Norfolk Island, and 145 from the Kermadec Islands. Tropical and subtropical species dominate all three faunas, but the proportion of tropical species decreases, and the proportion of subtropical species increases, from west to east. Subtropical species are the most abundant individual fishes at all three islands. Only 4.6% of the combined fauna is endemic, with individual island endemism even lower (1.2-2.1 %). The fish faunas of the three islands appear to have originated mainly by larval dispersal from Australia and the Coral Sea. Evidence for present-day dispersal is discussed. Faunal relationships among the subtropical islands of the western, central, and eastern South Pacific are examined. In the South Pacific as a whole, there is a high positive correlation between coastal fish diversity and hermatypic coral diversity.."
)

People

Here we add the people involved in the project:

The creator is the person or organization responsible for creating the resource itself.

The contact is the person or institution to contact with questions about the use, interpretation of a data set.

The metadataProvider is the person responsible for providing the metadata documentation for the resource.

The associatedParty (in this case the Data Curator) is the person who mobilized the data from the original resource.

creator <- eml$creator(
  individualName = eml$individualName(
    givenName = "Malcolm P.", 
    surName = "Francis"),
  organizationName = "University of Auckland"
)

contact <- eml$creator(
  individualName = eml$individualName(
    givenName = "OBIS", 
    surName = "Secretariat"),
  electronicMailAddress = "helpdesk@obis.org",
  organizationName = "OBIS",
  positionName = "Secretariat"
)

metadataProvider <- eml$metadataProvider(
  individualName = eml$individualName(
    givenName = "Chandra", 
    surName = "Earl"),
  electronicMailAddress = "c.earl@unesco.org",
  organizationName = "UNESCO",
  positionName = "eDNA Scientific Officer"
)

associatedParty <- eml$associatedParty(
  role = "processor",
  individualName = eml$individualName(
    givenName = "Chandra", 
    surName = "Earl"),
  electronicMailAddress = "c.earl@unesco.org",
  organizationName = "UNESCO",
  positionName = "eDNA Scientific Officer"
)

Additional Metadata

Here we add the additionalMetadata element, which is required for a GBIF-type EML file and contains information such as the citation of the dataset, the citation of the original resource and the creation timestamp of the EML.

#{dataset.authors} ({dataset.pubDate}) {dataset.title}. [Version {dataset.version}]. {organization.title}. {dataset.type} Dataset {dataset.doi}, {dataset.url}

additionalMetadata <- eml$additionalMetadata(
  metadata = list(
    gbif = list(
      dateStamp = paste0(format(Sys.time(), "%Y-%m-%dT%H:%M:%OS3"), paste0(substr(format(Sys.time(), "%z"), 1, 3), ":", paste0(substr(format(Sys.time(), "%z"), 4, 5)))),
      hierarchyLevel = "dataset",
      citation = "IPT will autogenerate this",
      bibliography = list(
        citation = "Francis, Malcolm. (1993). Checklist of the coastal fishes of Lord Howe, Norfolk and Kermadec Islands, southwest Pacific Ocean.")
    )
  )
)

citationdoi <- "http://dx.doi.org/10.6084/m9.figshare.c.4428305"

Coverage

Here we describe the dataset’s geographic, taxonomic and temporal coverage.

#Coverage
coverage <- eml$coverage(
  geographicCoverage = eml$geographicCoverage(
    geographicDescription = "Lord Howe Island Group",
    boundingCoordinates = eml$boundingCoordinates(
      westBoundingCoordinate = st_bbox(ind_shape)$xmax,
      eastBoundingCoordinate = st_bbox(ind_shape)$xmin,
      northBoundingCoordinate = st_bbox(ind_shape)$ymax,
      southBoundingCoordinate = st_bbox(ind_shape)$ymin)
    ),
  taxonomicCoverage = eml$taxonomicCoverage(
    generalTaxonomicCoverage = "Fishes",
    taxonomicClassification = list(
      eml$taxonomicClassification(
        taxonRankName = "Superclass",
        taxonRankValue = "Agnatha"),
      eml$taxonomicClassification(
        taxonRankName = "unranked",
        taxonRankValue = "Chondrichthyes"),
      eml$taxonomicClassification(
        taxonRankName = "unranked",
        taxonRankValue = "Osteichthyes")
      )
    
#  ),
#  temporalCoverage = eml$temporalCoverage(
#    rangeOfDates = eml$rangeOfDates(
#      beginDate = eml$beginDate(
#        calendarDate = "2019-05-01"
#      ),
#      endDate = eml$endDate(
#        calendarDate = "2016-05-06"
#      )
#    )
   )
)

Extra MetaData

These fields are not required, though they make the metadata more complete.

methods <- eml$methods(
  methodStep = eml$methodStep(
    description = eml$description(
      para = paste("See Github <a href=\"https://github.com/iobis/mwhs-data-mobilization\">Project</a> and <a href=\"https://iobis.github.io/mwhs-data-mobilization/notebooks/", site_dir_name, "/", dataset_dir_name, "\"> R Notebook</a> for dataset construction methods", sep="")
    )
  )
)

#Other Data
pubDate <- "2023-10-15"

#language of original document
language <- "eng"

keywordSet <- eml$keywordSet(
  keyword = "Occurrence",
  keywordThesaurus = "GBIF Dataset Type Vocabulary: http://rs.gbif.org/vocabulary/gbif/dataset_type_2015-07-10.xml"
)

maintenance <- eml$maintenance(
  description = eml$description(
    para = ""),
  maintenanceUpdateFrequency = "notPlanned"
)

#Universal CC
intellectualRights <- eml$intellectualRights(
  para = "To the extent possible under law, the publisher has waived all rights to these data and has dedicated them to the <ulink url=\"http://creativecommons.org/publicdomain/zero/1.0/legalcode\"><citetitle>Public Domain (CC0 1.0)</citetitle></ulink>. Users may copy, modify, distribute and use the work, including for commercial purposes, without restriction."
)


purpose <- eml$purpose(
  para = "These data were made accessible through UNESCO's eDNA Expeditions project to mobilize available marine species and occurrence datasets from World Heritage Sites."
)

additionalInfo <- eml$additionalInfo(
  para = "marine, harvested by iOBIS"
)

Create and Validate EML

#Put it all together
my_eml <- eml$eml(
           packageId = paste("https://ipt.obis.org/secretariat/resource?id=", short_name, "/v1.0", sep = ""),  
           system = "http://gbif.org",
           scope = "system",
           dataset = eml$dataset(
               alternateIdentifier = alternateIdentifier,
               title = title,
               creator = creator,
               metadataProvider = metadataProvider,
               associatedParty = associatedParty,
               pubDate = pubDate,
               coverage = coverage,
               language = language,
               abstract = abstract,
               keywordSet = keywordSet,
               contact = contact,
               methods = methods,
               intellectualRights = intellectualRights,
               purpose = purpose,
               maintenance = maintenance,
               additionalInfo = additionalInfo),
           additionalMetadata = additionalMetadata
)

eml_validate(my_eml)
## [1] TRUE
## attr(,"errors")
## character(0)

Create meta.xml file

This is a file which describes the archive and data file structure and is required in a DarwinCore-Archive. It is based on the template file “meta_occurrence_checklist_template.xml”

meta_template <- paste(path_to_project_root, "scripts_data/meta_occurrence_checklist_template.xml", sep="/")
meta <- read_xml(meta_template)

fields <- xml_find_all(meta, "//d1:field")

for (field in fields) {
  term <- xml_attr(field, "term")
  if (term == "http://rs.tdwg.org/dwc/terms/eventDate") {
    xml_set_attr(field, "default", eventDate)
  } else if (term == "http://rs.tdwg.org/dwc/terms/country") {
    xml_set_attr(field, "default", country)
  } else if (term == "http://rs.tdwg.org/dwc/terms/locality") {
    xml_set_attr(field, "default", locality)
  } else if (term == "http://rs.tdwg.org/dwc/terms/decimalLatitude") {
    xml_set_attr(field, "default", localities$decimalLatitude)
  } else if (term == "http://rs.tdwg.org/dwc/terms/decimalLongitude") {
    xml_set_attr(field, "default", localities$decimalLongitude)
  } else if (term == "http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters") {
    xml_set_attr(field, "default", localities$coordinateUncertaintyInMeters)
  } else if (term == "http://rs.tdwg.org/dwc/terms/footprintWKT") {
    xml_set_attr(field, "default", wkt)
  } else if (term == "http://rs.tdwg.org/dwc/terms/geodeticDatum") {
    xml_set_attr(field, "default", geodeticDatum)
  } else if (term == "http://rs.tdwg.org/dwc/terms/occurrenceStatus") {
    xml_set_attr(field, "default", occurrenceStatus)
  } else if (term == "http://rs.tdwg.org/dwc/terms/basisOfRecord") {
    xml_set_attr(field, "default", basisOfRecord)
  }
}


#Add identificationQualifier
new_field <- xml_add_sibling(fields[[3]], "field")
xml_set_attr(new_field, "index", "3")
xml_set_attr(new_field, "term", "http://rs.tdwg.org/dwc/terms/identificationQualifier")

fields <- append(fields, list(new_field))

Save outputs

dwc_output_dir <- paste(path_to_project_root, "output", site_dir_name, dataset_dir_name, sep="/")

write.csv(occurrence, paste(dwc_output_dir, "/occurrence.csv", sep = ""), na = "", row.names=FALSE)
write_xml(meta, file = paste(dwc_output_dir, "/meta.xml", sep = ""))
write_eml(my_eml, paste(dwc_output_dir, "/eml.xml", sep = ""))

Edit EML

We have to further edit the eml file to conform to GBIF-specific requirements that cannot be included in the original EML construction. This includes changing the schemaLocation and rearranging the GBIF element, since the construction automatically arranges the children nodes to alphabetical order.

#edit the schemaLocation and rearrange gbif node for gbif specific eml file
eml_content <- read_xml(paste(dwc_output_dir, "/eml.xml", sep = ""))

#change schemaLocation attributes for GBIF
root_node <- xml_root(eml_content)
xml_set_attr(root_node, "xsi:schemaLocation", "https://eml.ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.2/eml.xsd")
xml_set_attr(root_node, "xmlns:dc", "http://purl.org/dc/terms/")
xml_set_attr(root_node, "xmlns:stmml", NULL)
xml_set_attr(root_node, "xml:lang", "eng")


#rearrange children nodes under the GBIF element
hierarchyLevel <- eml_content %>% xml_find_all(".//hierarchyLevel")
dateStamp <- eml_content %>% xml_find_all(".//dateStamp")
citation <- eml_content %>% xml_find_all("./additionalMetadata/metadata/gbif/citation")
bibcitation <- eml_content %>% xml_find_all("./additionalMetadata/metadata/gbif/bibliography/citation")
xml_set_attr(bibcitation, "identifier", citationdoi)

eml_content %>% xml_find_all(".//hierarchyLevel") %>% xml_remove()
eml_content %>% xml_find_all(".//dateStamp") %>% xml_remove()
eml_content %>% xml_find_all("./additionalMetadata/metadata/gbif/citation") %>% xml_remove()
eml_content %>% xml_find_all(".//gbif") %>% xml_add_child(citation, .where=0)
eml_content %>% xml_find_all(".//gbif") %>% xml_add_child(hierarchyLevel, .where=0)
eml_content %>% xml_find_all(".//gbif") %>% xml_add_child(dateStamp, .where=0)

write_xml(eml_content, paste(dwc_output_dir, "/eml.xml", sep = ""))

Zip files to DwC-A

output_zip <- paste(dwc_output_dir, "DwC-A.zip", sep="/")

if (file.exists(output_zip)) {
  unlink(output_zip)
}

file_paths <- list.files(dwc_output_dir, full.names = TRUE)
zip(zipfile = output_zip, files = file_paths, mode = "cherry-pick")

if (file.exists(output_zip)) {
  unlink(file_paths)
}