Last active
July 5, 2016 13:54
-
-
Save rmflight/5a3d23aaef168b54770d6505e39c0b10 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "mzML": { | |
| "schemaLocation": "http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd", | |
| "id": "exampleData", | |
| "version": "1.1.0" | |
| }, | |
| "cvList": { | |
| "cv": [ | |
| { | |
| "id": "MS", | |
| "fullName": "Proteomics Standards Initiative Mass Spectrometry Ontology", | |
| "version": "3.79.0", | |
| "URI": "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" | |
| } | |
| ], | |
| "cv.1": [ | |
| { | |
| "id": "UO", | |
| "fullName": "Unit Ontology", | |
| "version": "12:10:2011", | |
| "URI": "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" | |
| } | |
| ], | |
| "count": "2" | |
| }, | |
| "fileDescription": { | |
| "fileContent": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000579", | |
| "name": "MS1 spectrum", | |
| "value": "" | |
| } | |
| ], | |
| "cvParam.1": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000580", | |
| "name": "MSn spectrum", | |
| "value": "" | |
| } | |
| ] | |
| }, | |
| "sourceFileList": { | |
| "sourceFile": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000768", | |
| "name": "Thermo nativeID format", | |
| "value": "" | |
| } | |
| ], | |
| "cvParam.1": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000563", | |
| "name": "Thermo RAW format", | |
| "value": "" | |
| } | |
| ], | |
| "cvParam.2": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000569", | |
| "name": "SHA-1", | |
| "value": "6679ba84f57e8f25a3b8ebecc806ecafc79492ec" | |
| } | |
| ], | |
| "id": "RAW1", | |
| "name": "UK001N1exoposb.raw", | |
| "location": "file:///" | |
| }, | |
| "count": "1" | |
| } | |
| }, | |
| "referenceableParamGroupList": { | |
| "referenceableParamGroup": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1002416", | |
| "name": "Orbitrap Fusion", | |
| "value": "" | |
| } | |
| ], | |
| "cvParam.1": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000529", | |
| "name": "instrument serial number", | |
| "value": "FSN10352" | |
| } | |
| ], | |
| "id": "CommonInstrumentParams" | |
| }, | |
| "count": "1" | |
| }, | |
| "softwareList": { | |
| "software": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000532", | |
| "name": "Xcalibur", | |
| "value": "" | |
| } | |
| ], | |
| "id": "Xcalibur", | |
| "version": "1.1.982" | |
| }, | |
| "software.1": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000615", | |
| "name": "ProteoWizard software", | |
| "value": "" | |
| } | |
| ], | |
| "id": "pwiz", | |
| "version": "3.0.9205" | |
| }, | |
| "count": "2" | |
| }, | |
| "instrumentConfigurationList": { | |
| "instrumentConfiguration": { | |
| "referenceableParamGroupRef": [ | |
| { | |
| "ref": "CommonInstrumentParams" | |
| } | |
| ], | |
| "componentList": { | |
| "source": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000398", | |
| "name": "nanoelectrospray", | |
| "value": "" | |
| } | |
| ], | |
| "cvParam.1": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000485", | |
| "name": "nanospray inlet", | |
| "value": "" | |
| } | |
| ], | |
| "order": "1" | |
| }, | |
| "analyzer": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000081", | |
| "name": "quadrupole", | |
| "value": "" | |
| } | |
| ], | |
| "order": "2" | |
| }, | |
| "analyzer.1": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000484", | |
| "name": "orbitrap", | |
| "value": "" | |
| } | |
| ], | |
| "order": "3" | |
| }, | |
| "detector": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000624", | |
| "name": "inductive detector", | |
| "value": "" | |
| } | |
| ], | |
| "order": "4" | |
| }, | |
| "count": "4" | |
| }, | |
| "softwareRef": [ | |
| { | |
| "ref": "Xcalibur" | |
| } | |
| ], | |
| "id": "IC1" | |
| }, | |
| "instrumentConfiguration.1": { | |
| "referenceableParamGroupRef": [ | |
| { | |
| "ref": "CommonInstrumentParams" | |
| } | |
| ], | |
| "componentList": { | |
| "source": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000398", | |
| "name": "nanoelectrospray", | |
| "value": "" | |
| } | |
| ], | |
| "cvParam.1": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000485", | |
| "name": "nanospray inlet", | |
| "value": "" | |
| } | |
| ], | |
| "order": "1" | |
| }, | |
| "analyzer": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000081", | |
| "name": "quadrupole", | |
| "value": "" | |
| } | |
| ], | |
| "order": "2" | |
| }, | |
| "analyzer.1": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000083", | |
| "name": "radial ejection linear ion trap", | |
| "value": "" | |
| } | |
| ], | |
| "order": "3" | |
| }, | |
| "detector": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000253", | |
| "name": "electron multiplier", | |
| "value": "" | |
| } | |
| ], | |
| "order": "4" | |
| }, | |
| "count": "4" | |
| }, | |
| "softwareRef": [ | |
| { | |
| "ref": "Xcalibur" | |
| } | |
| ], | |
| "id": "IC2" | |
| }, | |
| "count": "2" | |
| }, | |
| "dataProcessingList": { | |
| "dataProcessing": { | |
| "processingMethod": { | |
| "cvParam": [ | |
| { | |
| "cvRef": "MS", | |
| "accession": "MS:1000544", | |
| "name": "Conversion to mzML", | |
| "value": "" | |
| } | |
| ], | |
| "order": "0", | |
| "softwareRef": "pwiz" | |
| }, | |
| "id": "pwiz_Reader_Thermo_conversion" | |
| }, | |
| "count": "1" | |
| }, | |
| "run": { | |
| "id": "exampleData", | |
| "defaultInstrumentConfigurationRef": "IC1", | |
| "startTimeStamp": "2015-07-29 12:49:35Z", | |
| "defaultSourceFileRef": "RAW1", | |
| "scanPolarity": "positive" | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0" encoding="utf-8"?> | |
| <indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.2_idx.xsd"> | |
| <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="exampleData" version="1.1.0"> | |
| <cvList count="2"> | |
| <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.79.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/> | |
| <cv id="UO" fullName="Unit Ontology" version="12:10:2011" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/> | |
| </cvList> | |
| <fileDescription> | |
| <fileContent> | |
| <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/> | |
| </fileContent> | |
| <sourceFileList count="1"> | |
| <sourceFile id="RAW1" name="UK001N1exoposb.raw" location="file:///"> | |
| <cvParam cvRef="MS" accession="MS:1000768" name="Thermo nativeID format" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000563" name="Thermo RAW format" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="6679ba84f57e8f25a3b8ebecc806ecafc79492ec"/> | |
| </sourceFile> | |
| </sourceFileList> | |
| </fileDescription> | |
| <referenceableParamGroupList count="1"> | |
| <referenceableParamGroup id="CommonInstrumentParams"> | |
| <cvParam cvRef="MS" accession="MS:1002416" name="Orbitrap Fusion" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000529" name="instrument serial number" value="FSN10352"/> | |
| </referenceableParamGroup> | |
| </referenceableParamGroupList> | |
| <softwareList count="2"> | |
| <software id="Xcalibur" version="1.1.982"> | |
| <cvParam cvRef="MS" accession="MS:1000532" name="Xcalibur" value=""/> | |
| </software> | |
| <software id="pwiz" version="3.0.9205"> | |
| <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard software" value=""/> | |
| </software> | |
| </softwareList> | |
| <instrumentConfigurationList count="2"> | |
| <instrumentConfiguration id="IC1"> | |
| <referenceableParamGroupRef ref="CommonInstrumentParams"/> | |
| <componentList count="4"> | |
| <source order="1"> | |
| <cvParam cvRef="MS" accession="MS:1000398" name="nanoelectrospray" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000485" name="nanospray inlet" value=""/> | |
| </source> | |
| <analyzer order="2"> | |
| <cvParam cvRef="MS" accession="MS:1000081" name="quadrupole" value=""/> | |
| </analyzer> | |
| <analyzer order="3"> | |
| <cvParam cvRef="MS" accession="MS:1000484" name="orbitrap" value=""/> | |
| </analyzer> | |
| <detector order="4"> | |
| <cvParam cvRef="MS" accession="MS:1000624" name="inductive detector" value=""/> | |
| </detector> | |
| </componentList> | |
| <softwareRef ref="Xcalibur"/> | |
| </instrumentConfiguration> | |
| <instrumentConfiguration id="IC2"> | |
| <referenceableParamGroupRef ref="CommonInstrumentParams"/> | |
| <componentList count="4"> | |
| <source order="1"> | |
| <cvParam cvRef="MS" accession="MS:1000398" name="nanoelectrospray" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000485" name="nanospray inlet" value=""/> | |
| </source> | |
| <analyzer order="2"> | |
| <cvParam cvRef="MS" accession="MS:1000081" name="quadrupole" value=""/> | |
| </analyzer> | |
| <analyzer order="3"> | |
| <cvParam cvRef="MS" accession="MS:1000083" name="radial ejection linear ion trap" value=""/> | |
| </analyzer> | |
| <detector order="4"> | |
| <cvParam cvRef="MS" accession="MS:1000253" name="electron multiplier" value=""/> | |
| </detector> | |
| </componentList> | |
| <softwareRef ref="Xcalibur"/> | |
| </instrumentConfiguration> | |
| </instrumentConfigurationList> | |
| <dataProcessingList count="1"> | |
| <dataProcessing id="pwiz_Reader_Thermo_conversion"> | |
| <processingMethod order="0" softwareRef="pwiz"> | |
| <cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" value=""/> | |
| </processingMethod> | |
| </dataProcessing> | |
| </dataProcessingList> | |
| <run id="exampleData" defaultInstrumentConfigurationRef="IC1" startTimeStamp="2015-07-29T12:49:35Z" defaultSourceFileRef="RAW1"> | |
| <spectrumList count="36" defaultDataProcessingRef="pwiz_Reader_Thermo_conversion"> | |
| <spectrum index="0" id="controllerType=0 controllerNumber=1 scan=3" defaultArrayLength="34573"> | |
| <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="1"/> | |
| <cvParam cvRef="MS" accession="MS:1000130" name="positive scan" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000128" name="profile spectrum" value=""/> | |
| <cvParam cvRef="MS" accession="MS:1000504" name="base peak m/z" value="432.239837646484" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/> | |
| <cvParam cvRef="MS" accession="MS:1000505" name="base peak intensity" value="3.8174825e06" unitCvRef="MS" unitAccession="MS:1000131" unitName="number of detector counts"/> | |
| <cvParam cvRef="MS" accession="MS:1000285" name="total ion current" value="2.0373532e07"/> | |
| <cvParam cvRef="MS" accession="MS:1000528" name="lowest observed m/z" value="148.509396632102" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/> | |
| <cvParam cvRef="MS" accession="MS:1000527" name="highest observed m/z" value="1616.112961912861" unitCvRef="MS" unitAccession="MS:1000040" unitName="m/z"/> | |
| <scanList count="1"> | |
| <cvParam cvRef="MS" accession="MS:1000795" name="no combination" value=""/> | |
| </scanList> | |
| </spectrum> | |
| </spectrumList> | |
| </run> | |
| </mzML> | |
| </indexedmzML> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(XML) | |
| library(jsonlite) | |
| source("R/file_metadata.R") | |
| input_file <- "input.mzML" | |
| input_list <- get_mzml_metadata(input_file) | |
| input_json <- meta_export_json(input_list) | |
| cat(input_json, file = "input.json") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' get mzML metadata | |
| #' | |
| #' @param mzml_file the mzML file to get metadata from | |
| #' | |
| #' @import XML | |
| #' @export | |
| get_mzml_metadata <- function(mzml_file){ | |
| xml_doc <- xmlTreeParse(mzml_file, useInternalNodes = TRUE) | |
| ns <- xmlNamespaceDefinitions(xmlRoot(xml_doc), recursive = TRUE, simplify = TRUE) | |
| names(ns)[1] <- "d1" | |
| mz_metanodes <- getNodeSet(xml_doc, "/d1:indexedmzML/d1:mzML", ns) | |
| mz_meta <- list() | |
| tmp_attr <- unclass(xmlAttrs(mz_metanodes[[1]])) | |
| attr(tmp_attr, "namespaces") <- NULL | |
| mz_meta[["mzML"]][[".attrs"]] <- tmp_attr | |
| other_nodes_2_get <- c("cvList", "fileDescription", | |
| "referenceableParamGroupList", | |
| "softwareList", | |
| "instrumentConfigurationList", | |
| "dataProcessingList") | |
| other_nodes <- xmlChildren(mz_metanodes[[1]]) | |
| other_list <- lapply(other_nodes, xmlToList) | |
| mz_meta <- c(mz_meta, other_list[other_nodes_2_get]) | |
| mz_meta[["run"]][[".attrs"]] <- xmlAttrs(mz_metanodes[[1]][["run"]]) | |
| mz_meta <- .remove_attrs(mz_meta) | |
| mz_meta_frame <- .to_data_frame(mz_meta) | |
| mz_meta_frame$run$scanPolarity <- .get_scan_polarity(other_list$run$spectrumList) | |
| mz_meta_frame$run$startTimeStamp <- gsub("T", " ", mz_meta_frame$run$startTimeStamp) | |
| mz_meta_frame | |
| } | |
| #' export metadata to json | |
| #' | |
| #' export the list metadata to a json string | |
| #' | |
| #' @param meta_list a list of metadata | |
| #' | |
| #' @importFrom jsonlite toJSON | |
| #' @export | |
| meta_export_json <- function(meta_list){ | |
| toJSON(meta_list, pretty = TRUE, auto_unbox = TRUE) | |
| } | |
| #' transform to data frame | |
| #' | |
| #' @param in_list the list of xml nodes to work on | |
| #' | |
| .to_data_frame <- function(in_list){ | |
| if (class(in_list) == "list") { | |
| out_list <- lapply(in_list, .to_data_frame) | |
| } else if (class(in_list) == "character") { | |
| if (!is.null(names(in_list))) { | |
| out_list <- as.data.frame(t(as.matrix(in_list))) | |
| } else { | |
| out_list <- in_list | |
| } | |
| } | |
| out_list | |
| } | |
| #' remove attributes | |
| #' | |
| #' removes a list entry called ".attrs" from a list, and makes them first level | |
| #' partners | |
| #' | |
| #' @param in_list the list to work on | |
| #' | |
| .remove_attrs <- function(in_list){ | |
| if (class(in_list) == "list") { | |
| out_list <- in_list | |
| list_names <- names(out_list) | |
| if (".attrs" %in% list_names) { | |
| tmp_attrs <- out_list[[".attrs"]] | |
| name_attrs <- names(tmp_attrs) | |
| if (sum(name_attrs %in% list_names) == 0) { | |
| for (i_name in name_attrs) { | |
| out_list[[i_name]] <- tmp_attrs[[i_name]] | |
| } | |
| out_list[[".attrs"]] <- NULL | |
| } | |
| } else { | |
| out_list <- lapply(out_list, .remove_attrs) | |
| } | |
| # still need to check the rest of the pieces of the list! | |
| out_list <- lapply(out_list, .remove_attrs) | |
| } else { | |
| out_list <- in_list | |
| } | |
| out_list | |
| } | |
| #' get_scan_mode | |
| #' | |
| #' takes a list from xmlToList for "run" and looks at whether all scans are positive, negative, or mixed | |
| #' | |
| #' @param spectrum_list the list of spectra | |
| #' | |
| .get_scan_polarity <- function(spectrum_list){ | |
| spectrum_list[[".attrs"]] <- NULL | |
| scan_data <- lapply(spectrum_list, function(in_spectrum){ | |
| cv_loc <- which(names(in_spectrum) %in% "cvParam") | |
| cv_data <- unlist(in_spectrum[cv_loc]) | |
| scan_polarity <- grep("scan", cv_data, value = TRUE) | |
| scan_polarity | |
| }) | |
| scan_polarity <- as.character(unique(scan_data)) | |
| if ((length(scan_polarity) == 1) && (grepl("positive", scan_polarity))) { | |
| out_polarity <- "positive" | |
| } else if ((length(scan_polarity) == 1) && (grepl("negative", scan_polarity))) { | |
| out_polarity <- "negative" | |
| } else { | |
| out_polarity <- "mixed" | |
| } | |
| out_polarity | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment