https://github.com/sunray1/NEONTickstoHumboldt

The following code will download tick and tick pathogen data collected by the National Ecological Observatory Network (NEON). The goal is to remap the data to Darwin-Core using the Humboldt Extension. This will include vouchered specimens (Occurrence) and event ecological data (Humboldt).

Load libraries

library(neonUtilities)
library(dplyr)

Download files

[NEON 2025a] NEON (National Ecological Observatory Network) [1]. 2025. NEON Tick pathogen status (DP1.10092.001), RELEASE-2025. https://doi.org/10.48443/8nhe-cp13. Dataset accessed from https://data.neonscience.org/data-products/DP1.10092.001/RELEASE-2025 on 2025-09-25.

[NEON 2025b] NEON (National Ecological Observatory Network) [2]. 2025. NEON Ticks sampled using drag cloths (DP1.10093.001), RELEASE-2025. https://doi.org/10.48443/6zpz-5z19. Dataset accessed from https://data.neonscience.org/data-products/DP1.10093.001/RELEASE-2025 on 2025-09-25.

I have a NEON token to download data, though I’ve saved the data as an Rdata file so users don’t have to redownload.

Get Tick and Tick Pathogen data

if (!dir.exists("data")) {
  dir.create("data")
}

if (!dir.exists("outputs")) {
  dir.create("outputs")
}

if (file.exists("data/tick.pathogen.RData")) {
  load("data/tick.pathogen.RData")
} else {
  
  tick.pathogen <- loadByProduct(dpID="DP1.10092.001", 
                                 package = "basic",
                                 release = "RELEASE-2025",
                                 token = Sys.getenv("NEON_TOKEN"),
                                 check.size = F)
  
  save(tick.pathogen, file = "data/tick.pathogen.RData")
}

if (file.exists("data/tick.occurrence.RData")) {
  load("data/tick.occurrence.RData")
} else {
  
  tick.occurrence <- loadByProduct(dpID="DP1.10093.001", 
                                   package = "basic",
                                   release = "RELEASE-2025",
                                   token = Sys.getenv("NEON_TOKEN"),
                                   check.size = F)
  
  save(tick.occurrence, file = "data/tick.occurrence.RData")
}

Map Data

Map Event Data

Some event data is not included in the NEON data download (data for the project, domain and site).

All fields from the Survey event table template are included for completeness, even if some are not applicable to this dataset.

Project Event

NEON_project_event <- data.frame(parentEventID = NA, #Q: should I add namespace to these terms?
                                  eventID = "NEON",
                                  siteNestingDescription = "46 terrestrial sites each with at least six 40×40 m plots designated for tick sampling (plots may be decommissioned and reassigned as necessary, but six plots are required for sampling) located across 19 ecoclimatic domains",
                                  siteCount = length(unique(tick.occurrence$tck_fielddata$plotID)),
                                  fieldNumber = NA,
                                  verbatimSiteNames = paste(sort(unique(tick.occurrence$tck_fielddata$plotID)), collapse = " | "),
                                  habitat = NA,
                                  verbatimSiteDescriptions = NA,
                                  reportedWeather = NA,
                                  reportedExtremeConditions = NA,
                                  locationID = NA,
                                  countryCode = "US",
                                  decimalLatitude = NA,
                                  decimalLongitude = NA,
                                  coordinateUncertaintyInMeters = NA,
                                  geodeticDatum = NA,
                                  locality = NA,
                                  totalAreaSampledValue = length(unique(tick.occurrence$tck_fielddata$plotID)) * 40 * 4, #summed value of plot perimeters
                                  totalAreaSampledUnit = "m",
                                  geospatialScopeAreaValue = 9428288, #full geographic extent of USA-NEON boundaries
                                  geospatialScopeAreaUnit = "km²",
                                  sampleSizeValue = NA,
                                  sampleSizeUnit = NA,
                                  footprintWKT = NA,
                                  footprintSRS = NA,
                                  isVegetationCoverReported = "false",
                                  eventDate = paste(
                                    min(tick.occurrence$tck_fielddata$collectDate, na.rm = TRUE),
                                    max(tick.occurrence$tck_fielddata$collectDate, na.rm = TRUE),
                                    sep = "/"
                                  ),
                                  eventTime = NA,
                                  eventDurationValue = NA,
                                  eventDurationUnit = NA,
                                  eventType = "project",
                                  inventoryTypes = NA, #Q: I guess we could consider this an inventory and fill out this and the following, though the guide says only if eventType = Inventory
                                  compilationTypes = NA,
                                  compilationSourceTypes = NA,
                                  samplingProtocol = "Drag sampling | Flagging",
                                  protocolNames = "Drag sampling | Flagging",
                                  protocolDescriptions = "Ticks are sampled at six plots per site every three to six weeks, depending on prior detections. Sampling follows the 160 m perimeter of each 40 × 40 m plot, using drag cloths (or flagging in dense vegetation). Ticks from all life stages are collected every few meters and preserved in ethanol, with bouts scheduled between green-up and dormancy. Samples are sent to external labs where they are sorted, counted and a subset of identified nymphs are tested for pathogens.",
                                  protocolReferences = "Paull, S. 2022. TOS Protocol and Procedure: TCK – Tick and Tick-Borne Pathogen Sampling. NEON.DOC.014045. NEON (National Ecological Observatory Network). | Laboratory of Medical Zoology (LMZ). 2023. NEON Tick Pathogen Testing SOP, V4.01. University of Massachusetts, Amherst. | Beati, L. 2021. Tick Identification Instructions, USNTC Standard Operating Procedure (SOP). V3. Georgia Southern University, US National Tick Collection (USNTC).",
                                  isAbsenceReported = "true", #Q: Should this be reported true at this level or only at the event visit level?
                                  absentTaxa = NA,
                                  isAbundanceReported = "true",
                                  isAbundanceCapReported = "false",
                                  abundanceCap = NA,
                                  hasMaterialSamples = "true", #Q: Since we're not explicitly designating these as material samples, should this be false?
                                  materialSampleTypes = "wholeOrganism",
                                  hasVouchers = "true",
                                  voucherInstitutions = "US National Tick Collection, Georgia Southern University",
                                  isLeastSpecificTargetCategoryQuantityInclusive = "false",
                                  dataGeneralizations = NA,
                                  informationWithheld = NA, #TODO: double check this - I don't think any tick taxa are withheld but I'm not 100% certain
                                  fieldNotes = NA,
                                  eventRemarks = NA,
                                  verbatimTargetScope = "ticks",
                                  targetTaxonomicScope = "?Ixodidae", #Q: Since we have pathogens in occurrences, I'm not sure how to designate those here - the ones tested don't have a mca - most are Bacteria but one is a piroplasm
                                  excludedTaxonomicScope = NA,
                                  isTaxonomicScopeFullyReported = "true",
                                  taxonCompletenessReported = "reportedComplete", #Q: I'm not 100% sure I did these two correctly
                                  taxonCompletenessProtocols = "Tick drag/flag methods are standardized to comprehensively detect all life stages of Ixodidae during the active season.", #Q: I'm not 100% sure I did these two correctly
                                  hasNonTargetTaxa = "false",
                                  areNonTargetTaxaFullyReported = NA,
                                  nonTargetTaxa = NA,
                                  identifiedBy = paste(unique(tick.occurrence$tck_taxonomyProcessed$laboratoryName), collapse = " | "), #Q: I could change this to paste(unique(tick.occurrence$tck_taxonomyProcessed$identifiedBy), collapse = " | ") and get the actual list of people who did the identifications, though this is not clean
                                  targetLifeStageScope = "adult | nymph | larvae",
                                  excludedLifeStageScope = "egg",
                                  isLifeStageScopeFullyReported = "true",
                                  targetDegreeOfEstablishmentScope = NA,
                                  excludedDegreeOfEstablishmentScope = NA,
                                  isDegreeOfEstablishmentScopeFullyReported = NA,
                                  targetGrowthFormScope = NA,
                                  excludedGrowthFormScope = NA,
                                  isGrowthFormScopeFullyReported = NA,
                                  hasNonTargetOrganisms = NA,
                                  targetHabitatScope = NA,
                                  excludedHabitatScope = NA,
                                  samplingEffort = "6 plots per bout, 160 m perimeter sampled per plot",
                                  isSamplingEffortReported = "true",
                                  samplingEffortProtocol = "Ticks are sampled at six plots per site, with bouts every 3 or 6 weeks depending on intensity. Each bout consists of dragging (or flagging if needed) along the full 160 m perimeter of each 40 × 40 m plot. Ticks of all life stages are collected at intervals and preserved in ethanol.",
                                  samplingEffortValue = "6, 160",
                                  samplingEffortUnit = "plots per bout, meters per plot circuit",
                                  samplingPerformedBy = "NEON Field Staff" #Q: I could change this to paste(unique(tick.occurrence$tck_fielddata$measuredBy), collapse = " | ") and get the list of actual people, though this has orcid and emails and is quite messy
                                )

Domain Events

NEON_domain_metadata <- read.csv("data/neon_domain_METADATA.csv", stringsAsFactors = FALSE)

NEON_domain_list <- unique(tick.occurrence$tck_fielddata[, c("domainID")])

#index
domain_idx <- match(NEON_domain_list, NEON_domain_metadata$domain_id)

added_domain_data <- data.frame(parentEventID = "NEON",
                                eventID = NEON_domain_metadata$domain_id[domain_idx],
                                siteNestingDescription = "Each domain contains 1-3 terrestrial field sites, each with at least six 40×40 m plots designated for tick sampling (plots may be decommissioned and reassigned as necessary, but six plots are required for sampling)",
                                siteCount = tapply(tick.occurrence$tck_fielddata$plotID,    #Get number of sampled plots per domain
                                                   tick.occurrence$tck_fielddata$domainID,
                                                   function(x) length(unique(x)))[NEON_domain_list],
                                fieldNumber = NA,
                                verbatimSiteNames = tapply(tick.occurrence$tck_fielddata$plotID,     #Get list of sampled plots at each domain
                                                           tick.occurrence$tck_fielddata$domainID,
                                                           function(x) paste(sort(unique(x)), collapse = " | ")
                                                         )[NEON_domain_list],
                                habitat = NA,
                                verbatimSiteDescriptions = NA,
                                reportedWeather = NA,
                                reportedExtremeConditions = NA,
                                locationID = NEON_domain_metadata$domain_id[domain_idx],
                                countryCode = "US",
                                decimalLatitude = NEON_domain_metadata$decimalLatitude[domain_idx],
                                decimalLongitude = NEON_domain_metadata$decimalLongitude[domain_idx],
                                coordinateUncertaintyInMeters = NEON_domain_metadata$coordinateUncertaintyinMeters[domain_idx],
                                geodeticDatum = "WGS84",
                                locality = paste(NEON_domain_metadata$domain_name[domain_idx], " (", NEON_domain_metadata$domain_id[domain_idx], ")", sep = ""),
                                totalAreaSampledValue = tapply(tick.occurrence$tck_fielddata$plotID, #summed value of surveyed plot perimeters in domain
                                                               tick.occurrence$tck_fielddata$domainID,
                                                               function(x) length(unique(x)))[NEON_domain_list] * 40 * 4,
                                totalAreaSampledUnit = "m",
                                geospatialScopeAreaValue = NEON_domain_metadata$sq_km[domain_idx],
                                geospatialScopeAreaUnit = "km²",
                                sampleSizeValue = NA,
                                sampleSizeUnit = NA,
                                footprintWKT = NA, #Q: I'm not going to bother with these since they'll be huge
                                footprintSRS = NA,
                                isVegetationCoverReported = "false",
                                eventDate = tapply(
                                  tick.occurrence$tck_fielddata$collectDate,
                                  tick.occurrence$tck_fielddata$domainID,
                                  function(x) paste(min(x, na.rm = TRUE), max(x, na.rm = TRUE), sep = "/")
                                )[NEON_domain_list],
                                eventTime = NA,
                                eventDurationValue = NA,
                                eventDurationUnit = NA,
                                eventType = "domain",
                                inventoryTypes = NA,
                                compilationTypes = NA,
                                compilationSourceTypes = NA,
                                samplingProtocol = "Drag sampling | Flagging", #Sampling protocol terms should be populated for every Event regardless of hierarchical level Q: I took this to mean all the terms described in 5.5. Methodology or sampling protocol (from here up to eventRemarks), or is this just for the four protocol fields?
                                protocolNames = "Drag sampling | Flagging",
                                protocolDescriptions = "Ticks are sampled at six plots per site every three to six weeks, depending on prior detections. Sampling follows the 160 m perimeter of each 40 × 40 m plot, using drag cloths (or flagging in dense vegetation). Ticks from all life stages are collected every few meters and preserved in ethanol, with bouts scheduled between green-up and dormancy. Samples are sent to external labs where they are sorted, counted and a subset of identified nymphs are tested for pathogens.",
                                protocolReferences = "Paull, S. 2022. TOS Protocol and Procedure: TCK – Tick and Tick-Borne Pathogen Sampling. NEON.DOC.014045. NEON (National Ecological Observatory Network). | Laboratory of Medical Zoology (LMZ). 2023. NEON Tick Pathogen Testing SOP, V4.01. University of Massachusetts, Amherst. | Beati, L. 2021. Tick Identification Instructions, USNTC Standard Operating Procedure (SOP). V3. Georgia Southern University, US National Tick Collection (USNTC).",
                                isAbsenceReported = "true", #Q: Should this be reported true at this level or only at the event visit level?
                                absentTaxa = NA,
                                isAbundanceReported = "true",
                                isAbundanceCapReported = "false",
                                abundanceCap = NA,
                                hasMaterialSamples = "true", #Q: Since we're not explicitly designating these as material samples, should this be false?
                                materialSampleTypes = "wholeOrganism",
                                hasVouchers = "true",
                                voucherInstitutions = "US National Tick Collection, Georgia Southern University",
                                isLeastSpecificTargetCategoryQuantityInclusive = "false",
                                dataGeneralizations = NA,
                                informationWithheld = NA, #TODO: double check this - I don't think any tick taxa are withheld but I'm not 100% certain
                                fieldNotes = NA,
                                eventRemarks = NA,
                                verbatimTargetScope = "ticks", # Recommended best practice is to populate scope terms every Event to which they apply.
                                targetTaxonomicScope = "?Ixodidae", #Q: Since we have pathogens in occurrences, I'm not sure how to designate those here - the ones tested don't have a mca - most are Bacteria but one is a piroplasm
                                excludedTaxonomicScope = NA,
                                isTaxonomicScopeFullyReported = "true",
                                taxonCompletenessReported = "reportedComplete", #Q: I'm not 100% sure I did these two correctly
                                taxonCompletenessProtocols = "Tick drag/flag methods are standardized to comprehensively detect all life stages of Ixodidae during the active season.", #Q: I'm not 100% sure I did these two correctly
                                hasNonTargetTaxa = "false",
                                areNonTargetTaxaFullyReported = NA,
                                nonTargetTaxa = NA,
                                identifiedBy = tick.occurrence$tck_taxonomyProcessed %>% 
                                  left_join(tick.occurrence$tck_fielddata %>% select(sampleID, domainID),
                                            by = "sampleID") %>%
                                  group_by(domainID) %>%
                                  summarise(identifiedBy = paste(sort(unique(laboratoryName)), collapse = " | "),
                                            .groups = "drop") %>%
                                  right_join(data.frame(domainID = NEON_domain_list), by = "domainID") %>%
                                  arrange(match(domainID, NEON_domain_list)) %>%
                                  mutate(identifiedBy = ifelse(is.na(identifiedBy), NA, identifiedBy)) %>% #Q: Some domains/sites have had no ticks found, so identifiedBy is NA - should this be changed to something more descriptive, like 'No ticks found'?
                                  pull(identifiedBy), #Q: I could change this to get the actual list of people who did the identifications, though this is not clean
                                targetLifeStageScope = "adult | nymph | larvae",
                                excludedLifeStageScope = "egg",
                                isLifeStageScopeFullyReported = "true",
                                targetDegreeOfEstablishmentScope = NA,
                                excludedDegreeOfEstablishmentScope = NA,
                                isDegreeOfEstablishmentScopeFullyReported = NA,
                                targetGrowthFormScope = NA,
                                excludedGrowthFormScope = NA,
                                isGrowthFormScopeFullyReported = NA,
                                hasNonTargetOrganisms = NA,
                                targetHabitatScope = NA,
                                excludedHabitatScope = NA,
                                samplingEffort = "6 plots per bout, 160 m perimeter sampled per plot",
                                isSamplingEffortReported = "true",
                                samplingEffortProtocol = "Ticks are sampled at six plots per site, with bouts every 3 or 6 weeks depending on intensity. Each bout consists of dragging (or flagging if needed) along the full 160 m perimeter of each 40 × 40 m plot. Ticks of all life stages are collected at intervals and preserved in ethanol.",
                                samplingEffortValue = "6, 160",
                                samplingEffortUnit = "plots per bout, meters per plot circuit",
                                samplingPerformedBy = "NEON Field Staff" #Q: I could change this to paste(unique(tick.occurrence$tck_fielddata$measuredBy), collapse = " | ") and get the list of actual people, though this has orcid and emails and is quite messy
                              )

Site Events

NEON_site_metadata <- read.csv(
  "https://www.neonscience.org/field-sites/exports/NEON_Field_Site_Metadata_20250924",
  stringsAsFactors = FALSE
)

NEON_site_list <- unique(tick.occurrence$tck_fielddata[, c("siteID")])

#indexes
site_idx <- match(NEON_site_list, NEON_site_metadata$site_id)

added_site_data <- data.frame(parentEventID = NEON_site_metadata$domain_id[site_idx],
                              eventID = NEON_site_metadata$site_id[site_idx],
                              siteNestingDescription = "Each site contains at least six 40×40 m plots designated for tick sampling (plots may be decommissioned and reassigned as necessary, but six plots are required for sampling)",
                              siteCount = tapply(tick.occurrence$tck_fielddata$plotID,    #Get number of sampled plots per site
                                                 tick.occurrence$tck_fielddata$siteID,
                                                 function(x) length(unique(x)))[NEON_site_list],
                              fieldNumber = NA,
                              verbatimSiteNames = tapply(tick.occurrence$tck_fielddata$plotID,     #Get list of sampled plots at each site
                                                         tick.occurrence$tck_fielddata$siteID,
                                                         function(x) paste(sort(unique(x)), collapse = " | ")
                                                       )[NEON_site_list],
                              habitat = gsub("\\|", " | ", NEON_site_metadata$dominant_nlcd_classes[site_idx]),
                              verbatimSiteDescriptions = NEON_site_metadata$site_type[site_idx],
                              reportedWeather = NA,
                              reportedExtremeConditions = NA,
                              locationID = NEON_site_metadata$site_id[site_idx],
                              countryCode = "US",
                              county = NEON_site_metadata$site_county[site_idx],
                              stateProvince = NEON_site_metadata$site_state[site_idx],
                              decimalLatitude = NEON_site_metadata$latitude[site_idx],
                              decimalLongitude = NEON_site_metadata$longitude[site_idx],
                              coordinateUncertaintyInMeters = NA, #Q: I can attempt to get uncertainty for each site but we don't have it handy and I'd have to calculate it by hand
                              geodeticDatum = "WGS84",
                              locality = paste(NEON_site_metadata$site_name[site_idx], " (", NEON_site_metadata$site_id[site_idx], ")", sep = ""),
                              minimumElevationInMeters = NEON_site_metadata$minimum_elevation_m[site_idx],
                              maximumElevationInMeters = NEON_site_metadata$maximum_elevation_m[site_idx],
                              verbatimElevation = paste(NEON_site_metadata$mean_evelation_m[site_idx], "m", sep=""),
                              totalAreaSampledValue = tapply(tick.occurrence$tck_fielddata$plotID, #summed value of surveyed plot perimeters in site
                                                             tick.occurrence$tck_fielddata$siteID,
                                                             function(x) length(unique(x)))[NEON_site_list] * 40 * 4,
                              totalAreaSampledUnit = "m",
                              geospatialScopeAreaValue = NEON_site_metadata$terrestrial_sampling_boundary_size_km2[site_idx],
                              geospatialScopeAreaUnit = "km²",
                              sampleSizeValue = NA,
                              sampleSizeUnit = NA,
                              footprintWKT = NA, #Q: I'm not going to bother with these since they'll be huge
                              footprintSRS = NA,
                              isVegetationCoverReported = "false",
                              eventDate = tapply(
                                tick.occurrence$tck_fielddata$collectDate,
                                tick.occurrence$tck_fielddata$siteID,
                                function(x) paste(min(x, na.rm = TRUE), max(x, na.rm = TRUE), sep = "/")
                              )[NEON_site_list],
                              eventTime = NA,
                              eventDurationValue = NA,
                              eventDurationUnit = NA,
                              eventType = "site", 
                              inventoryTypes = NA,
                              compilationTypes = NA,
                              compilationSourceTypes = NA,
                              samplingProtocol = "Drag sampling | Flagging", #Sampling protocol terms should be populated for every Event regardless of hierarchical level Q: I took this to mean all the terms described in 5.5. Methodology or sampling protocol (from here up to eventRemarks), or is this just for the four protocol fields?
                              protocolNames = "Drag sampling | Flagging",
                              protocolDescriptions = "Ticks are sampled at six plots per site every three to six weeks, depending on prior detections. Sampling follows the 160 m perimeter of each 40 × 40 m plot, using drag cloths (or flagging in dense vegetation). Ticks from all life stages are collected every few meters and preserved in ethanol, with bouts scheduled between green-up and dormancy. Samples are sent to external labs where they are sorted, counted and a subset of identified nymphs are tested for pathogens.",
                              protocolReferences = "Paull, S. 2022. TOS Protocol and Procedure: TCK – Tick and Tick-Borne Pathogen Sampling. NEON.DOC.014045. NEON (National Ecological Observatory Network). | Laboratory of Medical Zoology (LMZ). 2023. NEON Tick Pathogen Testing SOP, V4.01. University of Massachusetts, Amherst. | Beati, L. 2021. Tick Identification Instructions, USNTC Standard Operating Procedure (SOP). V3. Georgia Southern University, US National Tick Collection (USNTC).",
                              isAbsenceReported = "true", #Q: Should this be reported true at this level or only at the event visit level?
                              absentTaxa = NA,
                              isAbundanceReported = "true",
                              isAbundanceCapReported = "false",
                              abundanceCap = NA,
                              hasMaterialSamples = "true", #Q: Since we're not explicitly designating these as material samples, should this be false?
                              materialSampleTypes = "wholeOrganism",
                              hasVouchers = "true",
                              voucherInstitutions = "US National Tick Collection, Georgia Southern University",
                              isLeastSpecificTargetCategoryQuantityInclusive = "false",
                              dataGeneralizations = NA,
                              informationWithheld = NA, #TODO: double check this - I don't think any tick taxa are withheld but I'm not 100% certain
                              fieldNotes = NA,
                              eventRemarks = NA,
                              verbatimTargetScope = "ticks", # Recommended best practice is to populate scope terms every Event to which they apply.
                              targetTaxonomicScope = "?Ixodidae", #Q: Since we have pathogens in occurrences, I'm not sure how to designate those here - the ones tested don't have a mca - most are Bacteria but one is a piroplasm
                              excludedTaxonomicScope = NA,
                              isTaxonomicScopeFullyReported = "true",
                              taxonCompletenessReported = "reportedComplete", #Q: I'm not 100% sure I did these two correctly
                              taxonCompletenessProtocols = "Tick drag/flag methods are standardized to comprehensively detect all life stages of Ixodidae during the active season.", #Q: I'm not 100% sure I did these two correctly
                              hasNonTargetTaxa = "false",
                              areNonTargetTaxaFullyReported = NA,
                              nonTargetTaxa = NA,
                              identifiedBy = tick.occurrence$tck_taxonomyProcessed %>% 
                                left_join(tick.occurrence$tck_fielddata %>% select(sampleID, siteID),
                                          by = "sampleID") %>%
                                group_by(siteID) %>%
                                summarise(identifiedBy = paste(sort(unique(laboratoryName)), collapse = " | "),
                                          .groups = "drop") %>%
                                right_join(data.frame(siteID = NEON_site_list), by = "siteID") %>%
                                arrange(match(siteID, NEON_site_list)) %>%
                                mutate(identifiedBy = ifelse(is.na(identifiedBy), NA, identifiedBy)) %>% #Q: Some domains/sites have had no ticks found, so identifiedBy is NA - should this be changed to something more descriptive, like 'No ticks found'?
                                pull(identifiedBy), #Q: I could change this to get the actual list of people who did the identifications, though this is not clean
                              targetLifeStageScope = "adult | nymph | larvae",
                              excludedLifeStageScope = "egg",
                              isLifeStageScopeFullyReported = "true",
                              targetDegreeOfEstablishmentScope = NA,
                              excludedDegreeOfEstablishmentScope = NA,
                              isDegreeOfEstablishmentScopeFullyReported = NA,
                              targetGrowthFormScope = NA,
                              excludedGrowthFormScope = NA,
                              isGrowthFormScopeFullyReported = NA,
                              hasNonTargetOrganisms = NA,
                              targetHabitatScope = NA,
                              excludedHabitatScope = NA,
                              samplingEffort = "6 plots per bout, 160 m perimeter sampled per plot",
                              isSamplingEffortReported = "true",
                              samplingEffortProtocol = "Ticks are sampled at six plots per site, with bouts every 3 or 6 weeks depending on intensity. Each bout consists of dragging (or flagging if needed) along the full 160 m perimeter of each 40 × 40 m plot. Ticks of all life stages are collected at intervals and preserved in ethanol.",
                              samplingEffortValue = "6, 160",
                              samplingEffortUnit = "plots per bout, meters per plot circuit",
                              samplingPerformedBy = "NEON Field Staff" #Q: I could change this to paste(unique(tick.occurrence$tck_fielddata$measuredBy), collapse = " | ") and get the list of actual people, though this has orcid and emails and is quite messy
                            )

Plot Events

NEON_plot_metadata <- unique(tick.occurrence$tck_fielddata[, c("namedLocation",
                                                               "siteID",
                                                               "plotID",
                                                               "nlcdClass",
                                                               "decimalLatitude",
                                                               "decimalLongitude",
                                                               "geodeticDatum",
                                                               "coordinateUncertainty",
                                                               "elevation",
                                                               "elevationUncertainty")])

NEON_plot_metadata <- NEON_plot_metadata %>% #There's an issue where plot JERC_081 has two elevations, causing it to be duplicated, so we'll just remove the first one
  distinct(namedLocation, plotID, .keep_all = TRUE)

added_plot_data <- data.frame(parentEventID = NEON_plot_metadata$siteID,
                              eventID = NEON_plot_metadata$plotID,
                              siteNestingDescription = "One 40×40 m plot designated for tick sampling",
                              siteCount = 1,
                              fieldNumber = NA,
                              verbatimSiteNames = NEON_plot_metadata$plotID,
                              habitat = NEON_plot_metadata$nlcdClass,
                              verbatimSiteDescriptions = NA,
                              reportedWeather = NA,
                              reportedExtremeConditions = NA,
                              locationID = NEON_plot_metadata$plotID,
                              countryCode = "US",
                              decimalLatitude = NEON_plot_metadata$decimalLatitude,
                              decimalLongitude = NEON_plot_metadata$decimalLongitude,
                              coordinateUncertaintyInMeters = NEON_plot_metadata$coordinateUncertainty,
                              geodeticDatum = "WGS84",
                              locality = NEON_plot_metadata$plotID,
                              minimumElevationinMeters = NEON_plot_metadata$elevation - NEON_plot_metadata$elevationUncertainty,
                              maximumElevationinMeters = NEON_plot_metadata$elevation + NEON_plot_metadata$elevationUncertainty,
                              verbatimElevation = paste(NEON_plot_metadata$elevation, "m", sep=""),
                              totalAreaSampledValue = 40 * 4,
                              totalAreaSampledUnit = "m",
                              geospatialScopeAreaValue = 40 * 4,
                              geospatialScopeAreaUnit = "m",
                              sampleSizeValue = NA,
                              sampleSizeUnit = NA,
                              footprintWKT = NA,
                              footprintSRS = NA,
                              isVegetationCoverReported = "false",
                              eventDate = tapply(
                                tick.occurrence$tck_fielddata$collectDate,
                                tick.occurrence$tck_fielddata$plotID,
                                function(x) paste(min(x, na.rm = TRUE), max(x, na.rm = TRUE), sep = "/")
                              )[NEON_plot_metadata$plotID],
                              eventTime = NA,
                              eventDurationValue = NA,
                              eventDurationUnit = NA,
                              eventType = "plot", #Q: This was 'Survey' in the mapping sheet
                              inventoryTypes = NA,
                              compilationTypes = NA,
                              compilationSourceTypes = NA,
                              samplingProtocol = "Drag sampling | Flagging", #Sampling protocol terms should be populated for every Event regardless of hierarchical level Q: I took this to mean all the terms described in 5.5. Methodology or sampling protocol (from here up to eventRemarks), or is this just for the four protocol fields?
                              protocolNames = "Drag sampling | Flagging",
                              protocolDescriptions = "Ticks are sampled at six plots per site every three to six weeks, depending on prior detections. Sampling follows the 160 m perimeter of each 40 × 40 m plot, using drag cloths (or flagging in dense vegetation). Ticks from all life stages are collected every few meters and preserved in ethanol, with bouts scheduled between green-up and dormancy. Samples are sent to external labs where they are sorted, counted and a subset of identified nymphs are tested for pathogens.",
                              protocolReferences = "Paull, S. 2022. TOS Protocol and Procedure: TCK – Tick and Tick-Borne Pathogen Sampling. NEON.DOC.014045. NEON (National Ecological Observatory Network). | Laboratory of Medical Zoology (LMZ). 2023. NEON Tick Pathogen Testing SOP, V4.01. University of Massachusetts, Amherst. | Beati, L. 2021. Tick Identification Instructions, USNTC Standard Operating Procedure (SOP). V3. Georgia Southern University, US National Tick Collection (USNTC).",
                              isAbsenceReported = "true", #Q: Should this be reported true at this level or only at the event visit level?
                              absentTaxa = NA,
                              isAbundanceReported = "true",
                              isAbundanceCapReported = "false",
                              abundanceCap = NA,
                              hasMaterialSamples = "true", #Q: Since we're not explicitly designating these as material samples, should this be false?
                              materialSampleTypes = "wholeOrganism",
                              hasVouchers = "true",
                              voucherInstitutions = "US National Tick Collection, Georgia Southern University",
                              isLeastSpecificTargetCategoryQuantityInclusive = "false",
                              dataGeneralizations = NA,
                              informationWithheld = NA, #TODO: double check this - I don't think any tick taxa are withheld but I'm not 100% certain
                              fieldNotes = NA,
                              eventRemarks = NA,
                              verbatimTargetScope = "ticks", # Recommended best practice is to populate scope terms every Event to which they apply.
                              targetTaxonomicScope = "?Ixodidae", #Q: Since we have pathogens in occurrences, I'm not sure how to designate those here - the ones tested don't have a mca - most are Bacteria but one is a piroplasm
                              excludedTaxonomicScope = NA,
                              isTaxonomicScopeFullyReported = "true",
                              taxonCompletenessReported = "reportedComplete", #Q: I'm not 100% sure I did these two correctly
                              taxonCompletenessProtocols = "Tick drag/flag methods are standardized to comprehensively detect all life stages of Ixodidae during the active season.", #Q: I'm not 100% sure I did these two correctly
                              hasNonTargetTaxa = "false",
                              areNonTargetTaxaFullyReported = NA,
                              nonTargetTaxa = NA,
                              identifiedBy = tapply(
                                tick.occurrence$tck_taxonomyProcessed$laboratoryName,
                                tick.occurrence$tck_taxonomyProcessed$plotID,
                                function(x) paste(sort(unique(x)), collapse = " | ") #Q: Some plots have had no ticks identified, so identifiedBy is NA - should this be changed to something more descriptive, like 'No ticks found'?
                              )[NEON_plot_metadata$plotID], #Q: I could change this to get the actual list of people who did the identifications, though this is not clean
                              targetLifeStageScope = "adult | nymph | larvae",
                              excludedLifeStageScope = "egg",
                              isLifeStageScopeFullyReported = "true",
                              targetDegreeOfEstablishmentScope = NA,
                              excludedDegreeOfEstablishmentScope = NA,
                              isDegreeOfEstablishmentScopeFullyReported = NA,
                              targetGrowthFormScope = NA,
                              excludedGrowthFormScope = NA,
                              isGrowthFormScopeFullyReported = NA,
                              hasNonTargetOrganisms = NA,
                              targetHabitatScope = NA,
                              excludedHabitatScope = NA,
                              samplingEffort = "160 m perimeter sampled per bout",
                              isSamplingEffortReported = "true",
                              samplingEffortProtocol = "Ticks are sampled at six plots per site, with bouts every 3 or 6 weeks depending on intensity. Each bout consists of dragging (or flagging if needed) along the full 160 m perimeter of each 40 × 40 m plot. Ticks of all life stages are collected at intervals and preserved in ethanol.",
                              samplingEffortValue = "6, 160",
                              samplingEffortUnit = "plots per bout, meters per plot circuit",
                              samplingPerformedBy = "NEON Field Staff" #Q: I could change this to get the list of actual people, though this has orcid and emails and is quite messy
                            )

Plot Visit Events

Q: I didn’t map sampleCondition since we originally had this going to materialEntityRemarks. Since we’re not doing materialSample, is this where it should still go?

NEON_plot_visit_metadata <- tick.occurrence$tck_fielddata

added_plot_visit_data <- data.frame(parentEventID = NEON_plot_visit_metadata$plotID,
                                    eventID = ifelse(is.na(NEON_plot_visit_metadata$sampleID), #eventID is the sampleID if a sample is taken, if not, we create one with the same format (plotID.date)
                                                      paste0(
                                                        NEON_plot_visit_metadata$plotID, ".",
                                                        format(as.Date(NEON_plot_visit_metadata$collectDate), "%Y%m%d")
                                                      ),
                                                      NEON_plot_visit_metadata$sampleID
                                                    ),
                                    siteNestingDescription = "One visit to a 40x40 m plot designated for tick sampling",
                                    siteCount = 1, #Q: I don't know if this should be here for this event
                                    fieldNumber = NA,
                                    verbatimSiteNames = NEON_plot_visit_metadata$plotID,
                                    habitat = NEON_plot_visit_metadata$nlcdClass,
                                    verbatimSiteDescriptions = NA,
                                    reportedWeather = NA,
                                    reportedExtremeConditions = ifelse(
                                      is.na(NEON_plot_visit_metadata$samplingImpractical) | NEON_plot_visit_metadata$samplingImpractical == "OK", NA,
                                      paste0(NEON_plot_visit_metadata$samplingImpractical, ", sampling impractical")
                                    ),
                                    locationID = NEON_plot_visit_metadata$namedLocation,
                                    countryCode = "US",
                                    decimalLatitude = NEON_plot_visit_metadata$decimalLatitude,
                                    decimalLongitude = NEON_plot_visit_metadata$decimalLongitude,
                                    coordinateUncertaintyInMeters = NEON_plot_visit_metadata$coordinateUncertainty,
                                    geodeticDatum = "WGS84",
                                    locality = NEON_plot_visit_metadata$plotID,
                                    minimumElevationinMeters = NEON_plot_visit_metadata$elevation - NEON_plot_visit_metadata$elevationUncertainty,
                                    maximumElevationinMeters = NEON_plot_visit_metadata$elevation + NEON_plot_visit_metadata$elevationUncertainty,
                                    verbatimElevation = paste(NEON_plot_visit_metadata$elevation, "m", sep=""),
                                    totalAreaSampledValue = NEON_plot_visit_metadata$totalSampledArea,
                                    totalAreaSampledUnit = ifelse(
                                      is.na(NEON_plot_visit_metadata$totalSampledArea), NA, "m"
                                    ),
                                    geospatialScopeAreaValue = NA,
                                    geospatialScopeAreaUnit = NA,
                                    sampleSizeValue = NA,
                                    sampleSizeUnit = NA,
                                    footprintWKT = NA,
                                    footprintSRS = NA,
                                    isVegetationCoverReported = "false",
                                    eventDate = paste(NEON_plot_visit_metadata$collectDate),
                                    eventTime = NA,
                                    eventDurationValue = NA,
                                    eventDurationUnit = NA,
                                    eventType = "plot visit", #Q: This was 'Site Visit' in the mapping sheet
                                    inventoryTypes = NA,
                                    compilationTypes = NA,
                                    compilationSourceTypes = NA,
                                    samplingProtocol = NEON_plot_visit_metadata$samplingMethod, #Sampling protocol terms should be populated for every Event regardless of hierarchical level Q: I took this to mean all the terms described in 5.5. Methodology or sampling protocol (from here up to eventRemarks), or is this just for the four protocol fields?
                                    protocolNames = NEON_plot_visit_metadata$samplingMethod,
                                    protocolDescriptions = "Ticks are sampled at six plots per site every three to six weeks, depending on prior detections. Sampling follows the 160 m perimeter of each 40 × 40 m plot, using drag cloths (or flagging in dense vegetation). Ticks from all life stages are collected every few meters and preserved in ethanol, with bouts scheduled between green-up and dormancy. Samples are sent to external labs where they are sorted, counted and a subset of identified nymphs are tested for pathogens.",
                                    protocolReferences = paste(NEON_plot_visit_metadata$samplingProtocolVersion, "Paull, S. 2022. TOS Protocol and Procedure: TCK – Tick and Tick-Borne Pathogen Sampling. NEON.DOC.014045. NEON (National Ecological Observatory Network). | Laboratory of Medical Zoology (LMZ). 2023. NEON Tick Pathogen Testing SOP, V4.01. University of Massachusetts, Amherst. | Beati, L. 2021. Tick Identification Instructions, USNTC Standard Operating Procedure (SOP). V3. Georgia Southern University, US National Tick Collection (USNTC).", sep=" | "),
                                    isAbsenceReported = ifelse(
                                      is.na(NEON_plot_visit_metadata$targetTaxaPresent),
                                      "false",
                                      "true"
                                    ), #Q: I only did this at the event level, not the occurrence level (i.e., there are no occurrences with occurrenceStatus = absent) - the guide says otherwise #Q: if targetTaxaPresent is NA (ie. there was no sampling done, should this be NA or false?) #Q: I think I'm getting lost on if this is an attribute of the survey event visit or the protocol
                                    absentTaxa = ifelse(
                                      NEON_plot_visit_metadata$targetTaxaPresent == "N", "?Ixodidae", NA #Q: Since we have pathogens in occurrences, I'm not sure how to designate those here - the ones tested don't have a mca - most are Bacteria but one is a piroplasm  #Q: If there are no absences (i.e., there were ticks found) should this be something besides NA? Should this be a list of species?
                                    ),
                                    isAbundanceReported = ifelse(
                                      is.na(NEON_plot_visit_metadata$targetTaxaPresent),
                                      "false",
                                      "true"
                                    ), #Q: I think I'm getting lost on if this is an attribute of the survey event visit or the protocol #Q: If no survey occurred should this be something besides NA?
                                    isAbundanceCapReported = "false",
                                    abundanceCap = NA,
                                    hasMaterialSamples = "true", #Q: Since we're not explicitly designating these as material samples, should this be false?
                                    materialSampleTypes = "wholeOrganism",
                                    hasVouchers = "true",
                                    voucherInstitutions = "US National Tick Collection, Georgia Southern University",
                                    isLeastSpecificTargetCategoryQuantityInclusive = "false",
                                    dataGeneralizations = NA,
                                    informationWithheld = NA, #TODO: double check this - I don't think any tick taxa are withheld but I'm not 100% certain
                                    fieldNotes = NA,
                                    eventRemarks = NEON_plot_visit_metadata$remarks,
                                    verbatimTargetScope = "ticks", # Recommended best practice is to populate scope terms every Event to which they apply.
                                    targetTaxonomicScope = "?Ixodidae", #Q: Since we have pathogens in occurrences, I'm not sure how to designate those here - the ones tested don't have a mca - most are Bacteria but one is a piroplasm
                                    excludedTaxonomicScope = NA,
                                    isTaxonomicScopeFullyReported = "true",
                                    taxonCompletenessReported = "reportedComplete", #Q: I'm not 100% sure I did these two correctly
                                    taxonCompletenessProtocols = "Tick drag/flag methods are standardized to comprehensively detect all life stages of Ixodidae during the active season.", #Q: I'm not 100% sure I did these two correctly
                                    hasNonTargetTaxa = "false",
                                    areNonTargetTaxaFullyReported = NA,
                                    nonTargetTaxa = NA,
                                    identifiedBy = tick.occurrence$tck_fielddata %>% #Q: Some plots have had no ticks identified, so identifiedBy is NA - should this be changed to something more descriptive, like 'No ticks found'?
                                      left_join(
                                        tick.occurrence$tck_taxonomyProcessed %>%
                                          group_by(sampleID) %>%
                                          summarise(
                                            identifiedBy = paste(sort(unique(identifiedBy)), collapse = " | "),
                                            .groups = "drop"
                                          ),
                                        by = "sampleID"
                                      ) %>%
                                      pull(identifiedBy),
                                    targetLifeStageScope = "adult | nymph | larvae",
                                    excludedLifeStageScope = "egg",
                                    isLifeStageScopeFullyReported = "true",
                                    targetDegreeOfEstablishmentScope = NA,
                                    excludedDegreeOfEstablishmentScope = NA,
                                    isDegreeOfEstablishmentScopeFullyReported = NA,
                                    targetGrowthFormScope = NA,
                                    excludedGrowthFormScope = NA,
                                    isGrowthFormScopeFullyReported = NA,
                                    hasNonTargetOrganisms = NA,
                                    targetHabitatScope = NA,
                                    excludedHabitatScope = NA,
                                    samplingEffort = "160 m perimeter sampled per bout",
                                    isSamplingEffortReported = "true",
                                    samplingEffortProtocol = "Ticks are sampled at six plots per site, with bouts every 3 or 6 weeks depending on intensity. Each bout consists of dragging (or flagging if needed) along the full 160 m perimeter of each 40 × 40 m plot. Ticks of all life stages are collected at intervals and preserved in ethanol.",
                                    samplingEffortValue = "6, 160",
                                    samplingEffortUnit = "plots per bout, meters per plot circuit",
                                    samplingPerformedBy = NEON_plot_visit_metadata$measuredBy
                                  )

Merge Event Tables

event_data <- bind_rows(
  NEON_project_event,
  added_domain_data,
  added_site_data,
  added_plot_data,
  added_plot_visit_data
)
rownames(event_data) <- NULL

Map Occurrence Data

Tick Occurrences

NEON_tick_occurrence_data <- tick.occurrence$tck_taxonomyProcessed

added_tick_occurrence_data <- data.frame(eventID = NEON_tick_occurrence_data$sampleID,
                                         occurrenceID = NEON_tick_occurrence_data$subsampleID,
                                         basisOfRecord = "PreservedSpecimen",
                                         scientificName = NEON_tick_occurrence_data$scientificName,
                                         scientificNameAuthorship = NEON_tick_occurrence_data$scientificNameAuthorship,
                                         taxonRank = NEON_tick_occurrence_data$taxonRank,
                                         kingdom = "Animalia",
                                         family = NEON_tick_occurrence_data$family,
                                         subfamily = NEON_tick_occurrence_data$subfamily,
                                         tribe = NEON_tick_occurrence_data$tribe,
                                         subtribe = NEON_tick_occurrence_data$subtribe,
                                         genus = NEON_tick_occurrence_data$genus,
                                         subgenus = NEON_tick_occurrence_data$subgenus,
                                         specificEpithet = NEON_tick_occurrence_data$specificEpithet,
                                         infraspecificEpithet = NEON_tick_occurrence_data$infraspecificEpithet,
                                         identificationQualifier = NEON_tick_occurrence_data$identificationQualifier,
                                         identificationReferences = NEON_tick_occurrence_data$identificationReferences,
                                         identificationRemarks = NEON_tick_occurrence_data$identificationProtocolVersion,
                                         occurrenceStatus = "present",
                                         organismQuantity = NA, #individualCount is used instead
                                         organismQuantityType = NA, #individualCount is used instead
                                         individualCount = NEON_tick_occurrence_data$individualCount,
                                         recordedByID = NA, #This information is in the plot visit event
                                         vernacularName = c("Dermacentor variabilis" = "American dog tick",
                                                            "Ixodes scapularis" = "Blacklegged tick / Deer tick",
                                                            "Haemaphysalis leporispalustris" = "Rabbit tick",
                                                            "Amblyomma americanum" = "Lone star tick",
                                                            "Haemaphysalis longicornis" = "Asian longhorned tick",
                                                            "Ixodes muris" = "Mouse tick",
                                                            "Ixodes dentatus" = "Rabbit tick",
                                                            "Amblyomma maculatum" = "Gulf Coast tick",
                                                            "Ixodes marxi" = "Squirrel tick",
                                                            "Ixodes brunneus" = "Bird tick",
                                                            "Dermacentor andersoni" = "Rocky Mountain wood tick",
                                                            "Dermacentor parumapertus" = "Desert wood tick",
                                                            "Ixodes pacificus" = "Western blacklegged tick",
                                                            "Ixodes angustus" = "Mouse tick",
                                                            "Dermacentor occidentalis" = "Pacific Coast tick"
                                                          )[NEON_tick_occurrence_data$scientificName],
                                         sex = ifelse(NEON_tick_occurrence_data$sexOrAge %in% c("Male", "Female"),
                                                       NEON_tick_occurrence_data$sexOrAge,
                                                       NA),
                                         lifeStage = ifelse(NEON_tick_occurrence_data$sexOrAge %in% c("Male", "Female"),
                                                             "Adult",
                                                             NEON_tick_occurrence_data$sexOrAge),
                                         establishmentMeans = NA,
                                         degreeOfEstablishment = NA,
                                         pathway = NA,
                                         vitality = "alive",
                                         preparations = NEON_tick_occurrence_data$archiveMedium,
                                         institutionID = NEON_tick_occurrence_data$archiveFacilityID,
                                         identifiedBy = paste(NEON_tick_occurrence_data$identifiedBy, NEON_tick_occurrence_data$laboratoryName, sep=", "),
                                         occurrenceRemarks = NEON_tick_occurrence_data$remarks
                               )

Tick Pathogen Occurrences

NEON_pathogen_occurrence_data <- tick.pathogen$tck_pathogen %>%
  filter(
    !is.na(testResult) &
    !is.na(testPathogenName) &
      testPathogenName != "HardTick DNA Quality" & #Remove DNA Quality test
      !grepl("^(Ixodes|Dermacentor|Amblyomma|Haemaphysalis)", testPathogenName) #Remove molecular identification tests
  )

added_pathogen_occurrence_data <- data.frame(eventID = sapply(strsplit(NEON_pathogen_occurrence_data$subsampleID, "\\."), 
                                                              function(x) paste(x[1:2], collapse = ".")),
                                             occurrenceID = NEON_pathogen_occurrence_data$uid,
                                             basisOfRecord = "LaboratoryObservation",  #Q: I'm not sure what to put here
                                             scientificName = NEON_pathogen_occurrence_data$testPathogenName,
                                             taxonRank = NA,
                                             kingdom = c("Borrelia burgdorferi" = "Bacteria",
                                                         "Borrelia miyamotoi" = "Bacteria",
                                                         "Anaplasma phagocytophilum" = "Bacteria",
                                                         "Rickettsia rickettsii" = "Bacteria",
                                                         "Ehrlichia chaffeensis" = "Bacteria",
                                                         "Borrelia lonestari" = "Bacteria",
                                                         "Babesia microti" = "Protista",
                                                         "Ehrlichia ewingii" = "Bacteria",
                                                         "Borrelia burgdorferi sensu lato" = "Bacteria",
                                                         "Ehrlichia muris-like" = "Bacteria",
                                                         "Borrelia mayonii" = "Bacteria",
                                                         "Francisella tularensis" = "Bacteria",
                                                         "Borrelia sp." = "Bacteria",
                                                         "Rickettsia parkeri" = "Bacteria",
                                                         "Rickettsia philipii" = "Bacteria"
                                                         )[NEON_pathogen_occurrence_data$testPathogenName],
                                             occurrenceStatus = ifelse(NEON_pathogen_occurrence_data$testResult == "Negative", "absent",
                                                                       ifelse(NEON_pathogen_occurrence_data$testResult == "Positive", "present", NA)
                                                                      ),
                                             organismQuantity = NA,
                                             organismQuantityType = NA,
                                             individualCount = NA, #Recommend not sharing this until we find out specifically what this is actually reporting. Note: this is always 1.
                                             recordedByID = NA, #This information is in the plot visit event
                                             vernacularName = c("Borrelia burgdorferi" = "Lyme disease agent",
                                                                "Borrelia miyamotoi" = "Borrelia miyamotoi disease (BMD) agent",
                                                                "Anaplasma phagocytophilum" = "Human granulocytic anaplasmosis (HGA) agent",
                                                                "Rickettsia rickettsii" = "Rocky Mountain spotted fever agent",
                                                                "Ehrlichia chaffeensis" = "Human monocytic ehrlichiosis agent",
                                                                "Borrelia lonestari" = "Southern tick-associated rash illness (STARI) agent",
                                                                "Babesia microti" = "Babesiosis parasite",
                                                                "Ehrlichia ewingii"  = "Human ehrlichiosis agent",
                                                                "Borrelia burgdorferi sensu lato" = "Lyme disease agent",
                                                                "Ehrlichia muris-like" = "Human ehrlichiosis agent",
                                                                "Borrelia mayonii" = "Lyme disease agent",
                                                                "Francisella tularensis" = "Tularemia agent",
                                                                "Borrelia sp." = "Lyme disease or relapsing fever agent",
                                                                "Rickettsia parkeri" = "Rickettsia parkeri rickettsiosis agent",
                                                                "Rickettsia philipii" = "Pacific coast tick fever agent"
                                                               )[NEON_pathogen_occurrence_data$testPathogenName],
                                             sex = NA,
                                             lifeStage = NA,
                                             establishmentMeans = NA,
                                             degreeOfEstablishment = NA,
                                             pathway = NA,
                                             vitality = NA,
                                             identificationRemarks = paste(NEON_pathogen_occurrence_data$testProtocolVersion,
                                                                            ifelse(is.na(NEON_pathogen_occurrence_data$remarks), "", 
                                                                                   paste("; ", NEON_pathogen_occurrence_data$remarks))
                                                                          ),
                                             identifiedBy = paste(NEON_pathogen_occurrence_data$testedBy, NEON_pathogen_occurrence_data$laboratoryName, sep=", "),
                                             dateIdentified = NEON_pathogen_occurrence_data$testedDate,
                                             materialSampleID = NEON_pathogen_occurrence_data$testingID, #Q: Does this need to be remapped or removed since we aren't doing materialSample?
                                             materialEntityRemarks = "materialSampleID corresponds to the NEON testingID, which uniquely identifies the individual tick nymph(s) tested."
                                   )

Merge Occurrence Tables

occurrence_data <- bind_rows(added_tick_occurrence_data,
                             added_pathogen_occurrence_data)
rownames(occurrence_data) <- NULL

Map Resource Relationship Data

Map emof Data

Map Other Data

Save Data

write.csv(event_data, "outputs/event.csv", row.names = FALSE)
write.csv(occurrence_data, "outputs/occurrence.csv", row.names = FALSE)
#write.csv(emof_data, "outputs/extendedMeasurementOrFact.csv", row.names = FALSE)
#write.csv(resourceRelationship_data, "outputs/resourceRelationship.csv", row.names = FALSE)