dlebauer · May 6, 2020 18:16
diff --git a/terraref_eml.R b/terraref_eml.R
 # Code used to generate metadata/eml_metadata.xml
 library(EML)
 dsl <- eml$creator(individualName = eml$individualName(givenName = "David",
                                                       surName = "LeBauer"),
                   organizationName = "University of Arizona",
                   electronicMailAddress = "[email protected]",
                   userId = list(directory = "https://orcid.org",
                                 userId    = "https://orcid.org/0000-0001-7228-053X"))

 doc <- list(packageId = "dataset-1", system = "Dryad",
            dataset = eml$dataset(
              title = "Data From: TERRA REF, An Open Reference Data Set From High Resolution Genomics, Phenomics, and Imaging Sensors",
              creator = dsl,
              contact = dsl))

 abstract <- EML::set_TextType("metadata/abstract.md")
 class(abstract) <- 'abstract'
 doc$dataset$abstract <- abstract


 library(traits)
 options(betydb_url = "https://terraref.ncsa.illinois.edu/bety/",
        betydb_api_version = 'v1')

 mac_field <- betydb_query(table = 'sites', sitename = '~MAC Field Scanner Field')
 library(sf)

 mac_bounds <- st_bbox(st_as_sfc(mac_field$geometry))

 sorghum <- betydb_query(table = 'species', genus = 'Sorghum')

 species <-   data.frame(Genus = sorghum$genus, 
                        Species = sorghum$scientificname)

 doc$dataset$coverage <- set_coverage(beginDate = "2017-04-13",
                                     endDate = "2018-08-02",
                                     geographicDescription   = "Maricopa Agricultural Center, Maricopa, AZ",
                                     sci_names = species,
                                     westBoundingCoordinate  = mac_bounds['xmin'],
                                     eastBoundingCoordinate  = mac_bounds['xmax'],
                                     northBoundingCoordinate = mac_bounds['ymax'],
                                     southBoundingCoordinate = mac_bounds['ymin']
 )

 # 
 # doc$dataset$methods <- EML::set_methods(
 #   sampling_file = 'metadata/experiments.md',
 #   methods_file = 'metadata/methods.md')

 #doc$dataset$dataTable <- eml$dataTable(
 #  entityName = 'metadata/germplasm.csv',
 #  entityDescription = paste0("List of Sorghum genotypes ('accessions')",
 #                             "and experiments in which they were included"))

 eml_validate(doc)
 write_eml(doc, "metadata/eml_metadata.xml")
diff --git a/terraref_eml.xml b/terraref_eml.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <eml:eml xmlns:eml="https://eml.ecoinformatics.org/eml-2.2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:stmml="http://www.xml-cml.org/schema/stmml-1.2" packageId="dataset-1" system="Dryad" xsi:schemaLocation="https://eml.ecoinformatics.org/eml-2.2.0 https://eml.ecoinformatics.org/eml-2.2.0/eml.xsd">
  <dataset>
    <title>Data From: TERRA REF, An Open Reference Data Set From High Resolution Genomics, Phenomics, and Imaging Sensors</title>
    <creator>
      <individualName>
        <givenName>David</givenName>
        <surName>LeBauer</surName>
      </individualName>
      <organizationName>University of Arizona</organizationName>
      <electronicMailAddress>[email protected]</electronicMailAddress>
      <userId directory="https://orcid.org">https://orcid.org/0000-0001-7228-053X</userId>
    </creator>
    <abstract>
      <para>
  The ARPA-E funded TERRA REF project generating open access reference
  datasets for the study of plant sensing, genomics, and phenomics.
  Sensor data are generated by a field scanner sensing platform that
  captures color, thermal, hyperspectral, and active flourescence
  imagery as well as three dimensional structure and associated
  environmental measurements. These data are provided alongside
  traditional field methods to support calibration and validation of
  algorithms used to extract plot level phenotypes from these datasets.
 </para>
      <para>
  Data were collected at a field site in Maricopa, AZ that hosts a large
  field scanner with fifteen sensors, many of which are capable of
  capturing mm scale images and point clouds at daily to weekly
  intervals.
 </para>
      <para>
  These data are intended to be reused, and are accessible as a
  combination of files and databases linked by spatial, temporal, and
  genomic information. In addition to providing open access data, the
  entire computational pipeline is open source, and we enable users to
  access high performance computing environments.
 </para>
      <para>
  Data Types:
 </para>
      <para>
  The study has evaluated a sorghum diversity panel, biparental cross
  populations, and elite lines and hybrids from structured breeding
  populations as well as a durum wheat diversity panel. This reference
  dataset can be used to characterize phenotype-to-genotype
  associations, on a genomic scale, that will enable knowledge-driven
  breeding and the development of higher-yielding cultivars of sorghum
  and wheat. The data is also being used to develop new algorithms for
  machine learning, image analysis, genomics, and optical sensor
  engineering.
 </para>
    </abstract>
    <coverage>
      <geographicCoverage>
        <geographicDescription>Maricopa Agricultural Center, Maricopa, AZ</geographicDescription>
        <boundingCoordinates>
          <westBoundingCoordinate>-111.9751</westBoundingCoordinate>
          <eastBoundingCoordinate>-111.9748</eastBoundingCoordinate>
          <northBoundingCoordinate>33.0765</northBoundingCoordinate>
          <southBoundingCoordinate>33.0745</southBoundingCoordinate>
        </boundingCoordinates>
      </geographicCoverage>
      <temporalCoverage>
        <rangeOfDates>
          <beginDate>
            <calendarDate>2017-04-13</calendarDate>
          </beginDate>
          <endDate>
            <calendarDate>2018-08-02</calendarDate>
          </endDate>
        </rangeOfDates>
      </temporalCoverage>
      <taxonomicCoverage>
        <taxonomicClassification>
          <taxonRankName>Genus</taxonRankName>
          <taxonRankValue>Sorghum</taxonRankValue>
          <taxonomicClassification>
            <taxonRankName>Species</taxonRankName>
            <taxonRankValue>Sorghum bicolor</taxonRankValue>
          </taxonomicClassification>
        </taxonomicClassification>
      </taxonomicCoverage>
    </coverage>
    <contact>
      <individualName>
        <givenName>David</givenName>
        <surName>LeBauer</surName>
      </individualName>
      <organizationName>University of Arizona</organizationName>
      <electronicMailAddress>[email protected]</electronicMailAddress>
      <userId directory="https://orcid.org">https://orcid.org/0000-0001-7228-053X</userId>
    </contact>
  </dataset>
 </eml:eml>
	# Code used to generate metadata/eml_metadata.xml
	library(EML)
	dsl <- eml$creator(individualName = eml$individualName(givenName = "David",
	surName = "LeBauer"),
	organizationName = "University of Arizona",
	electronicMailAddress = "[email protected]",
	userId = list(directory = "https://orcid.org",
	userId = "https://orcid.org/0000-0001-7228-053X"))

	doc <- list(packageId = "dataset-1", system = "Dryad",
	dataset = eml$dataset(
	title = "Data From: TERRA REF, An Open Reference Data Set From High Resolution Genomics, Phenomics, and Imaging Sensors",
	creator = dsl,
	contact = dsl))

	abstract <- EML::set_TextType("metadata/abstract.md")
	class(abstract) <- 'abstract'
	doc$dataset$abstract <- abstract


	library(traits)
	options(betydb_url = "https://terraref.ncsa.illinois.edu/bety/",
	betydb_api_version = 'v1')

	mac_field <- betydb_query(table = 'sites', sitename = '~MAC Field Scanner Field')
	library(sf)

	mac_bounds <- st_bbox(st_as_sfc(mac_field$geometry))

	sorghum <- betydb_query(table = 'species', genus = 'Sorghum')

	species <- data.frame(Genus = sorghum$genus,
	Species = sorghum$scientificname)

	doc$dataset$coverage <- set_coverage(beginDate = "2017-04-13",
	endDate = "2018-08-02",
	geographicDescription = "Maricopa Agricultural Center, Maricopa, AZ",
	sci_names = species,
	westBoundingCoordinate = mac_bounds['xmin'],
	eastBoundingCoordinate = mac_bounds['xmax'],
	northBoundingCoordinate = mac_bounds['ymax'],
	southBoundingCoordinate = mac_bounds['ymin']
	)

	#
	# doc$dataset$methods <- EML::set_methods(
	# sampling_file = 'metadata/experiments.md',
	# methods_file = 'metadata/methods.md')

	#doc$dataset$dataTable <- eml$dataTable(
	# entityName = 'metadata/germplasm.csv',
	# entityDescription = paste0("List of Sorghum genotypes ('accessions')",
	# "and experiments in which they were included"))

	eml_validate(doc)
	write_eml(doc, "metadata/eml_metadata.xml")
	<?xml version="1.0" encoding="UTF-8"?>
	<eml:eml xmlns:eml="https://eml.ecoinformatics.org/eml-2.2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:stmml="http://www.xml-cml.org/schema/stmml-1.2" packageId="dataset-1" system="Dryad" xsi:schemaLocation="https://eml.ecoinformatics.org/eml-2.2.0 https://eml.ecoinformatics.org/eml-2.2.0/eml.xsd">
	<dataset>
	<title>Data From: TERRA REF, An Open Reference Data Set From High Resolution Genomics, Phenomics, and Imaging Sensors</title>
	<creator>
	<individualName>
	<givenName>David</givenName>
	<surName>LeBauer</surName>
	</individualName>
	<organizationName>University of Arizona</organizationName>
	<electronicMailAddress>[email protected]</electronicMailAddress>
	<userId directory="https://orcid.org">https://orcid.org/0000-0001-7228-053X</userId>
	</creator>
	<abstract>
	<para>
	The ARPA-E funded TERRA REF project generating open access reference
	datasets for the study of plant sensing, genomics, and phenomics.
	Sensor data are generated by a field scanner sensing platform that
	captures color, thermal, hyperspectral, and active flourescence
	imagery as well as three dimensional structure and associated
	environmental measurements. These data are provided alongside
	traditional field methods to support calibration and validation of
	algorithms used to extract plot level phenotypes from these datasets.
	</para>
	<para>
	Data were collected at a field site in Maricopa, AZ that hosts a large
	field scanner with fifteen sensors, many of which are capable of
	capturing mm scale images and point clouds at daily to weekly
	intervals.
	</para>
	<para>
	These data are intended to be reused, and are accessible as a
	combination of files and databases linked by spatial, temporal, and
	genomic information. In addition to providing open access data, the
	entire computational pipeline is open source, and we enable users to
	access high performance computing environments.
	</para>
	<para>
	Data Types:
	</para>
	<para>
	The study has evaluated a sorghum diversity panel, biparental cross
	populations, and elite lines and hybrids from structured breeding
	populations as well as a durum wheat diversity panel. This reference
	dataset can be used to characterize phenotype-to-genotype
	associations, on a genomic scale, that will enable knowledge-driven
	breeding and the development of higher-yielding cultivars of sorghum
	and wheat. The data is also being used to develop new algorithms for
	machine learning, image analysis, genomics, and optical sensor
	engineering.
	</para>
	</abstract>
	<coverage>
	<geographicCoverage>
	<geographicDescription>Maricopa Agricultural Center, Maricopa, AZ</geographicDescription>
	<boundingCoordinates>
	<westBoundingCoordinate>-111.9751</westBoundingCoordinate>
	<eastBoundingCoordinate>-111.9748</eastBoundingCoordinate>
	<northBoundingCoordinate>33.0765</northBoundingCoordinate>
	<southBoundingCoordinate>33.0745</southBoundingCoordinate>
	</boundingCoordinates>
	</geographicCoverage>
	<temporalCoverage>
	<rangeOfDates>
	<beginDate>
	<calendarDate>2017-04-13</calendarDate>
	</beginDate>
	<endDate>
	<calendarDate>2018-08-02</calendarDate>
	</endDate>
	</rangeOfDates>
	</temporalCoverage>
	<taxonomicCoverage>
	<taxonomicClassification>
	<taxonRankName>Genus</taxonRankName>
	<taxonRankValue>Sorghum</taxonRankValue>
	<taxonomicClassification>
	<taxonRankName>Species</taxonRankName>
	<taxonRankValue>Sorghum bicolor</taxonRankValue>
	</taxonomicClassification>
	</taxonomicClassification>
	</taxonomicCoverage>
	</coverage>
	<contact>
	<individualName>
	<givenName>David</givenName>
	<surName>LeBauer</surName>
	</individualName>
	<organizationName>University of Arizona</organizationName>
	<electronicMailAddress>[email protected]</electronicMailAddress>
	<userId directory="https://orcid.org">https://orcid.org/0000-0001-7228-053X</userId>
	</contact>
	</dataset>
	</eml:eml>