Created
October 1, 2012 08:58
-
-
Save ktym/3810432 to your computer and use it in GitHub Desktop.
Convert RefSeq genome entry into RDF/Turtle using FALDO (BH12) and URN
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby-1.9 | |
require 'rubygems' | |
require 'uri' | |
require 'bio' | |
require 'json' | |
require 'securerandom' | |
# [TODO] integrate this into BioRuby | |
module Bio | |
class GenBank | |
def dblink | |
fetch('DBLINK') | |
end | |
def bioproject | |
dblink[/\d+/] | |
end | |
end | |
end | |
### | |
### Utilities for RDF generation | |
### | |
module RDFSupport | |
def new_uuid(prefix = "http://purl.jp/bio/10/genome/uuid/") | |
#return "<#{prefix}#{SecureRandom.uuid}>" | |
#return "genome:uuid-#{SecureRandom.uuid}" | |
return "<urn:uuid:#{SecureRandom.uuid}>" | |
end | |
def quote(str) | |
return str.gsub('\\', '\\\\').gsub("\t", '\\t').gsub("\n", '\\n').gsub("\r", '\\r').gsub('"', '\\"').inspect | |
end | |
def triple(s, p, o) | |
return [s, p, o].join("\t") + " ." | |
end | |
def default_prefix | |
return [ | |
triple("@prefix", "rdf:", "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>"), | |
triple("@prefix", "rdfs:", "<http://www.w3.org/2000/01/rdf-schema#>"), | |
#triple("@prefix", "dcterms:", "<http://purl.org/dc/terms/>"), | |
triple("@prefix", "xsd:", "<http://www.w3.org/2001/XMLSchema#>"), | |
#triple("@prefix", "sio:", "<http://semanticscience.org/resource#>"), | |
#triple("@prefix", "so:", "<http://purl.org/obo/owl/SO#>"), | |
triple("@prefix", "obo:", "<http://purl.obolibrary.org/obo/>"), | |
triple("@prefix", "faldo:", "<http://biohackathon.org/resource/faldo#>"), | |
] | |
end | |
def usdate2date(str) | |
return Date.parse(str).strftime("%Y-%m-%d") | |
end | |
end | |
### | |
### Mapping RefSeq db_xref to Identifiers.org | |
### | |
# https://gist.github.com/3985701 | |
# https://gist.github.com/4146256 | |
class RS_ID | |
include RDFSupport | |
def initialize | |
@rs_id = JSON.parse(File.read("rs_id.json")) | |
end | |
def fetch(db) | |
@rs_id[db] | |
end | |
def labels | |
@rs_id.sort.each do |db, hash| | |
puts triple("insdc:#{hash['class']}", "rdfs:label", quote(hash['label'])) | |
end | |
end | |
end | |
### | |
### Mapping RefSeq feature table to Sequence Ontology | |
### | |
# https://gist.github.com/3650401 | |
class FT_SO | |
def initialize | |
@data = JSON.parse(File.read("ft_so.json")) | |
end | |
# ftso = FT_SO.new | |
# puts ftso.so_id("-10_signal") # => "SO:0000175" | |
def so_id(feature) | |
if hash = @data[feature] | |
return hash["so_id"] | |
end | |
end | |
def so_term(feature) | |
if hash = @data[feature] | |
return hash["so_term"] | |
end | |
end | |
def so_desc(feature) | |
if hash = @data[feature] | |
return hash["so_desc"] | |
end | |
end | |
def ft_desc(feature) | |
if hash = @data[feature] | |
return hash["ft_desc"] | |
end | |
end | |
end | |
### | |
### Convert RefSeq (prokaryote) entries to RDF | |
### | |
class RefSeq2RDF | |
include RDFSupport | |
def initialize(io = ARGF, seqtype = nil) | |
set_prefixes | |
@seqtype = seqtype | |
@rs_id = RS_ID.new | |
@ft_so = FT_SO.new | |
@locus = {} | |
@xref_warn = {} | |
puts prefix | |
puts | |
parse_refseq(io) | |
end | |
attr_accessor :prefix | |
def set_prefixes | |
@prefix = default_prefix + [ | |
#triple("@prefix", "genome:", "<http://purl.jp/bio/10/genome/>"), | |
#triple("@prefix", "idorg:", "<http://ns.identifiers.org/>"), | |
triple("@prefix", "insdc:", "<http://rdf.insdc.org/>"), | |
] | |
end | |
def xref(subject, db, id) | |
case db | |
when "HOMD" | |
id.sub!(/^tax_/, '') | |
when "ECOCYC" | |
#id = "ECOCYC:#{id}" | |
when "GI", "ERIC", "HMP", "PSEUDO", "Pathema" | |
unless @xref_warn[db] | |
$stderr.puts "Warning: Need to register '#{db}' in Identifiers.org" | |
@xref_warn[db] = true | |
end | |
end | |
if hash = @rs_id.fetch(db) | |
uri = "<#{hash['prefix']}#{id}>" | |
puts triple(subject, "rdfs:seeAlso", uri) | |
puts triple(uri, "rdfs:label", quote("#{db}:#{id}")) | |
puts triple(uri, "rdf:type", "insdc:#{hash['class']}") | |
else | |
unless @xref_warn[db] | |
$stderr.puts "Error: New database '#{db}' found. Add it to the rs_id.json file and/or Identifiers.org." | |
@xref_warn[db] = true | |
end | |
end | |
end | |
### | |
### FALDO http://biohackathon.org/faldo | |
### | |
def new_location(pos, elem_type = false) | |
loc_id = new_uuid | |
puts triple(loc_id, "insdc:location_string", quote(pos)) | |
@locations = Bio::Locations.new(pos) | |
pos_start = new_uuid | |
pos_end = new_uuid | |
puts triple(loc_id, "rdf:type", "faldo:Region") | |
puts triple(loc_id, "faldo:begin", pos_start) | |
puts triple(loc_id, "faldo:end", pos_end) | |
new_position(pos_start, @locations.range.min, @locations.first.strand) | |
new_position(pos_end, @locations.range.max, @locations.last.strand) | |
list = [] | |
if elem_type | |
@locations.each do |loc| | |
elem_id = new_uuid | |
elem_start = new_uuid | |
elem_end = new_uuid | |
puts triple(elem_id, "obo:so_part_of", loc_id) | |
puts triple(elem_id, "rdf:type", elem_type[:id]) + " # #{elem_type[:term]}" | |
puts triple(elem_id, "rdf:type", "faldo:Region") | |
puts triple(elem_id, "faldo:begin", elem_start) | |
puts triple(elem_id, "faldo:end", elem_end) | |
new_position(elem_start, loc.from, loc.strand) | |
new_position(elem_end, loc.to, loc.strand) | |
list << elem_id | |
end | |
end | |
return loc_id, list | |
end | |
def new_position(pos_id, pos, strand) | |
puts triple(pos_id, "faldo:position", pos) | |
puts triple(pos_id, "faldo:reference", @sequence_id) | |
puts triple(pos_id, "rdf:type", "faldo:ExactPosition") | |
if strand > 0 | |
puts triple(pos_id, "rdf:type", "faldo:ForwardStrandPosition") | |
else | |
puts triple(pos_id, "rdf:type", "faldo:ReverseStrandPosition") | |
end | |
end | |
### | |
### Main | |
### | |
def parse_refseq(io) | |
# Read RefSeq entry | |
Bio::FlatFile.auto(io).each do |entry| | |
@entry = entry | |
@features = entry.features | |
@source = @features.shift | |
parse_sequence | |
parse_source | |
parse_genes | |
parse_cds | |
parse_features | |
end | |
end | |
### | |
### Sequence | |
### | |
# [TODO] | |
# * bind sequences by BioProject ID | |
# * complete/draft? | |
def parse_sequence | |
@sequence_id = new_uuid | |
# [TODO] How to identify the input is chromosome/plasmid/contig/...? | |
sequence_type(@seqtype) | |
# [TODO] Obtain rdfs:label from source /chromosome (eukaryotes) /plasmid (prokaryotes) -> see insdc:source_chromosome, insdc:source_plasmid | |
sequence_label(@entry.definition) | |
sequence_version(@entry.acc_version) | |
sequence_length(@entry.nalen) | |
# [TODO] provide REST API to retreive genomic DNA sequence by <@sequence_id.fasta> | |
sequence_seq(@entry.acc_version) | |
sequence_form(@entry.circular) | |
# [TODO] sequenced date, modified in the source db or in our RDF data? | |
sequence_date(@entry.date) | |
# [TODO] rdfs:seeAlso (like UniProt) or dc:relation, owl:sameAs | |
sequence_link_gi(@entry.gi.sub('GI:','')) | |
sequence_link_accver(@entry.acc_version) | |
sequence_link_bioproject(@entry.bioproject) | |
# [TODO] how to deal with direct submissions (references without PMID)? | |
sequence_ref(@entry.references) | |
end | |
def sequence_type(so = "SO:chromosome") | |
case so | |
when /0000340/, "SO:chromosome" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000340") + " # SO:chromosome" | |
when /0000155/, "SO:plasmid" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000155") + " # SO:plasmid" | |
when /0000736/, "SO:organelle_sequence" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000736") + " # SO:organelle_sequence" | |
when /0000819/, "SO:mitochondrial_chromosome" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000819") + " # SO:mitochondrial_chromosome" | |
when /0000740/, "SO:plastid_sequence" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000740") + " # SO:plastid_sequence" | |
when /0000719/, "SO:ultracontig" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000719") + " # SO:ultracontig" | |
when /0000148/, "SO:supercontig", "SO:scaffold" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000148") + " # SO:supercontig/scaffold" | |
when /0000149/, "SO:contig" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000149") + " # SO:contig" | |
else | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000353") + " # SO:sequence_assembly" | |
end | |
end | |
def sequence_label(str) | |
# Use "name:" key in the JSON representation | |
puts triple(@sequence_id, "rdfs:label", quote(str)) | |
end | |
def sequence_version(str) | |
puts triple(@sequence_id, "insdc:sequence_version", quote(str)) | |
end | |
def sequence_length(int) | |
puts triple(@sequence_id, "insdc:sequence_length", int) | |
end | |
def sequence_seq(str) | |
# [TODO] Where to privide the actual DNA sequence? | |
fasta_uri = "<http://togows.dbcls.jp/entry/nucleotide/#{str}.fasta>" | |
#fasta_uri = "<http://www.ncbi.nlm.nih.gov/nuccore/#{str}?report=fasta>" | |
puts triple(@sequence_id, "insdc:sequence_fasta", fasta_uri) | |
end | |
def sequence_form(form) | |
case form | |
when "linear" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000987") + " # SO:linear" | |
when "circular" | |
puts triple(@sequence_id, "rdf:type", "obo:SO_0000988") + " # SO:circular" | |
end | |
end | |
def sequence_date(date) | |
puts triple(@sequence_id, "insdc:sequence_date", quote(usdate2date(date))+"^^xsd:date") | |
end | |
def sequence_link_gi(str) | |
xref(@sequence_id, 'GI', str) | |
end | |
def sequence_link_accver(str) | |
xref(@sequence_id, 'RefSeq', str) | |
end | |
def sequence_link_bioproject(str) | |
xref(@sequence_id, 'BioProject', str) | |
end | |
def sequence_ref(refs) | |
refs.each do |ref| | |
pmid = ref.pubmed | |
if pmid.length > 0 | |
xref(@sequence_id, 'PubMed', pmid) | |
end | |
end | |
end | |
### | |
### Source | |
### | |
def parse_source | |
# Use @sequence_id for @source_id | |
@source_id = @sequence_id | |
hash = @source.to_hash | |
source_location(@source.position) | |
source_link(hash["db_xref"]) | |
hash.delete("db_xref") | |
source_qualifiers(hash) | |
end | |
def source_location(pos) | |
loc_id, = new_location(pos) | |
puts triple(@source_id, "faldo:location", loc_id) | |
end | |
def source_link(links) | |
links.each do |link| | |
db, entry_id = link.split(':', 2) | |
xref(@source_id, db, entry_id) | |
end | |
end | |
def source_qualifiers(hash) | |
hash.each do |qual, vals| | |
vals.each do |val| | |
if val == true | |
puts triple(@source_id, "insdc:source_#{qual}", true) | |
else | |
data = val.to_s.gsub(/\s+/, ' ').strip | |
if data[/^\d+$/] | |
puts triple(@source_id, "insdc:source_#{qual}", data) | |
else | |
puts triple(@source_id, "insdc:source_#{qual}", quote(data)) | |
end | |
end | |
end | |
end | |
end | |
### | |
### genes | |
### | |
def parse_genes | |
genes = @features.select {|x| x.feature == "gene"} | |
count = 1 | |
genes.each do |gene| | |
gene_id = new_uuid | |
hash = gene.to_hash | |
puts triple(gene_id, "rdf:type", "obo:SO_0000704") + " # SO:gene" | |
puts triple(gene_id, "obo:so_part_of", @sequence_id) | |
loc_id, _ = new_location(gene.position) | |
puts triple(gene_id, "faldo:location", loc_id) | |
if hash["locus_tag"] | |
locus_tag = hash["locus_tag"].first | |
@locus[locus_tag] = gene_id | |
puts triple(gene_id, "rdfs:label", quote(locus_tag)) | |
elsif hash["gene"] | |
puts triple(gene_id, "rdfs:label", quote(hash["gene"].first)) | |
else | |
# [TODO] Where else to find gene name? | |
puts triple(gene_id, "rdfs:label", quote("gene#{count}")) | |
end | |
count += 1 | |
parse_qualifiers(gene_id, hash) | |
end | |
end | |
### | |
### CDS | |
### | |
def parse_cds | |
cdss = @features.select {|x| x.feature == "CDS"} | |
count = 1 | |
cdss.each do |cds| | |
cds_id = new_uuid | |
hash = cds.to_hash | |
puts triple(cds_id, "rdf:type", "obo:SO_0000316") + " # SO:CDS" | |
if hash["locus_tag"] | |
if locus_tag = hash["locus_tag"].first | |
gene_id = @locus[locus_tag] | |
end | |
end | |
if gene_id | |
puts triple(cds_id, "obo:so_part_of", gene_id) | |
else | |
# [TODO] sure to do this? | |
puts triple(cds_id, "obo:so_part_of", @sequence_id) | |
end | |
if locus_tag | |
puts triple(cds_id, "rdfs:label", quote(locus_tag)) | |
elsif hash["gene"] | |
puts triple(cds_id, "rdfs:label", quote(hash["gene"].first)) | |
else | |
puts triple(cds_id, "rdfs:label", quote("CDS#{count}")) | |
end | |
count += 1 | |
elem_type = { :id => "obo:SO_0000147", :term => "SO:exon" } | |
loc_id, exons = new_location(cds.position, elem_type) | |
puts triple(cds_id, "faldo:location", loc_id) | |
puts triple(cds_id, "obo:so_has_part", "(#{exons.join(' ')})") # rdf:List | |
parse_qualifiers(cds_id, hash) | |
end | |
end | |
### | |
### Features | |
### | |
def parse_features | |
features = @features.select {|x| ! x.feature[/^(gene|CDS)$/]} | |
features.each do |feat| | |
feature = feat.feature | |
feature_id = new_uuid | |
hash = feat.to_hash | |
puts triple(feature_id, "obo:so_part_of", @sequence_id) | |
puts triple(feature_id, "rdfs:label", quote(feature)) | |
if so_id = @ft_so.so_id(feature) | |
if so_id != "undefined" | |
so = so_id.sub(':', '_') | |
puts triple(feature_id, "rdf:type", "obo:#{so}") + " # SO:#{@ft_so.so_term(feature)}" | |
else | |
puts triple(feature_id, "rdf:type", "obo:SO_0000110") + " # SO:sequence_feature" | |
end | |
end | |
loc_id, _ = new_location(feat.position) | |
puts triple(feature_id, "faldo:location", loc_id) | |
parse_qualifiers(feature_id, hash) | |
end | |
end | |
def parse_qualifiers(feature_id, hash) | |
hash.each do |qual, vals| | |
vals.each do |val| | |
if val == true | |
puts triple(feature_id, "insdc:feature_#{qual}", true) | |
else | |
data = val.to_s.gsub(/\s+/, ' ').strip | |
case qual | |
when "protein_id" | |
xref(feature_id, 'Protein', val) | |
when "db_xref" | |
db, id = val.split(':', 2) | |
# ad hoc | |
if db == 'InterPro' and @entry.acc_version[/(NC_010994.1|NC_014958.1|NC_015385.1|NC_015386.1|NC_015387.1|NC_015388.1|NC_015389.1)/] | |
# PRJNA59115/NC_010994.1 | |
# PRJNA62225/NC_014958.1 | |
# PRJNA65781/NC_015385.1 | |
# PRJNA65781/plasmids/NC_015386.1 | |
# PRJNA65783/NC_015387.1 | |
# PRJNA65785/NC_015388.1 | |
# PRJNA65787/NC_015389.1 | |
# /db_xref="InterPro:Chromosomal replication control, | |
# initiator (DnaA)/regulator (Hda" | |
xref(feature_id, db, id) if id[/IPR\d+/] | |
elsif db == "ASAP" and @entry.acc_version[/(NC_017263.1|NC_017264.1|NC_017265.1|NC_017266.1)/] | |
# PRJNA158537/plasmids/NC_017263.1 | |
# PRJNA158537/plasmids/NC_017264.1 | |
# PRJNA158537/NC_017265.1 | |
# PRJNA158537/plasmids/NC_017266.1 | |
# /db_xref="ASAP:BBE-0004740" | |
# /db_xref="ASAP:BBE-0004740 ERIC" | |
xref(feature_id, db, id) unless id[/\s/] | |
elsif db == 'TIGRFAM' and @entry.acc_version[/NC_013418.2/] | |
# PRJNA41287/NC_013418.2 | |
# /db_xref="TIGRFAM:TIGR00197; TF" | |
xref(feature_id, db, id.sub(/;.*/, '')) | |
else | |
xref(feature_id, db, id) | |
end | |
else | |
if data[/^\d+$/] | |
puts triple(feature_id, "insdc:feature_#{qual}", data) | |
else | |
puts triple(feature_id, "insdc:feature_#{qual}", quote(data)) | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
if __FILE__ == $0 | |
require 'getoptlong' | |
args = GetoptLong.new( | |
[ '--seqtype', '-t', GetoptLong::REQUIRED_ARGUMENT ], | |
) | |
opts = { | |
:seqtype => "SO:chromosome", | |
} | |
args.each_option do |name, value| | |
case name | |
when /--seqtype/ | |
opts[:seqtype] = value | |
end | |
end | |
RefSeq2RDF.new(ARGF, opts[:seqtype]) | |
end | |
Author
ktym
commented
Oct 1, 2012
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
# SO:chromosome, SO:linear
<urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6>
<http://genome.db/sw/feature_chromosome> 7 ;
<http://genome.db/sw/feature_isolate> "3D7" ;
<http://genome.db/sw/length> 1501717 ;
<http://genome.db/sw/location> "1..1501717" ;
<http://genome.db/sw/molecularType> "genomic DNA" ;
<http://genome.db/sw/organism> "Plasmodium falciparum 3D7" ;
<http://genome.db/sw/sequence> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6.fasta> ;
<http://genome.db/sw/start> 1 ;
<http://genome.db/sw/stop> 1501717 ;
<http://genome.db/sw/version> "NC_004328.2" ;
<http://genome.db/sw/xref> <urn:xref:bioproject:148>, <urn:xref:gi:296004920>, <urn:xref:refseq:NC_004328.2>, <urn:xref:taxon:36329> ;
<http://purl.org/dc/terms/modified> "2010-07-29"^^<http://www.w3.org/2001/XMLSchema#date> ;
a <http://purl.obolibrary.org/obo/SO_0000340>, <http://purl.obolibrary.org/obo/SO_0000987> ;
<http://www.w3.org/2000/01/rdf-schema#comment> "Plasmodium falciparum 3D7 chromosome 7." ;
<http://www.w3.org/2000/01/rdf-schema#label> "Chromosome 7" .
# SO:gene
<urn:uuid:5c3a336b-8d9c-4c88-a514-b390859d53e9>
<http://genome.db/sw/feature_gene> "PfCRT" ;
<http://genome.db/sw/feature_gene_synonym> "CRT; digestive vacuole transmembrane protein" ;
<http://genome.db/sw/feature_locus_tag> "MAL7P1.27" ;
<http://genome.db/sw/location> <urn:uuid:88af6bf4-1696-4c16-bd88-54ed8f32a77b> ;
<http://genome.db/sw/xref> <urn:xref:geneid:2655199> ;
<http://purl.org/dc/terms/isPartOf> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://purl.obolibrary.org/obo/SO_0000704> ;
<http://www.w3.org/2000/01/rdf-schema#label> "MAL7P1.27" .
# SO:STS
<urn:uuid:e3841e58-1a11-4bd2-b67a-5736604fa065>
<http://genome.db/sw/feature_gene> "PfCRT" ;
<http://genome.db/sw/feature_gene_synonym> "CRT; digestive vacuole transmembrane protein" ;
<http://genome.db/sw/feature_locus_tag> "MAL7P1.27" ;
<http://genome.db/sw/feature_standard_name> "B5M47" ;
<http://genome.db/sw/location> <urn:uuid:a2de1405-719c-4c17-98a9-262b323bff1a> ;
<http://genome.db/sw/xref> <urn:xref:UniSTS:105285> ;
<http://purl.org/dc/terms/isPartOf> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://purl.obolibrary.org/obo/SO_0000331> ;
<http://www.w3.org/2000/01/rdf-schema#label> "STS" .
# location of STS
<urn:uuid:a2de1405-719c-4c17-98a9-262b323bff1a>
<http://biohackathon.org/faldo/end> <urn:uuid:d1fb8f71-f7cc-47cf-a3a4-e2fca59f9968> ;
<http://biohackathon.org/faldo/start> <urn:uuid:fa2ce3b5-8114-4888-8169-61063312efcd> ;
<http://genome.db/sw/position> "458646..458807" .
<urn:uuid:fa2ce3b5-8114-4888-8169-61063312efcd>
<http://biohackathon.org/faldo/position> 458646 ;
<http://biohackathon.org/faldo/reference> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://biohackathon.org/faldo/ExactlyKnownPosition>, <http://biohackathon.org/faldo/ForwardStrandPosition> .
<urn:uuid:d1fb8f71-f7cc-47cf-a3a4-e2fca59f9968>
<http://biohackathon.org/faldo/position> 458807 ;
<http://biohackathon.org/faldo/reference> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://biohackathon.org/faldo/ExactlyKnownPosition>, <http://biohackathon.org/faldo/ForwardStrandPosition> .
# SO:mRNA
<urn:uuid:e4c305db-611c-478f-b3e7-9f0397a603f9>
<http://genome.db/sw/feature_gene> "PfCRT" ;
<http://genome.db/sw/feature_gene_synonym> "CRT; digestive vacuole transmembrane protein" ;
<http://genome.db/sw/feature_locus_tag> "MAL7P1.27" ;
<http://genome.db/sw/feature_transcript_id> "XM_001348968.1" ;
<http://genome.db/sw/location> <urn:uuid:6b155c2f-69a7-4405-8648-aceed0e4fb7e> ;
<http://genome.db/sw/xref> <urn:xref:geneid:2655199>, <urn:xref:gi:124511741> ;
<http://purl.org/dc/terms/isPartOf> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://purl.obolibrary.org/obo/SO_0000234> ;
<http://www.w3.org/2000/01/rdf-schema#label> "mRNA" .
# SO:CDS (parent -> "gene"; should be changed to "mRNA"?; how to make it consistent with Prokaryote RefSeq records?)
<urn:uuid:e5d162dd-b4c0-4b82-94be-c252e49aa091>
<http://genome.db/sw/exons> (<urn:uuid:b404b2d3-3642-4803-8523-76570cf846be>
<urn:uuid:d925c5bf-ec45-4542-82c8-c59f6ea6ddf4>
<urn:uuid:57202a72-4b79-4901-9a15-21daeb467274>
<urn:uuid:7eb80305-e087-40cb-bf3f-afea6eedb127>
<urn:uuid:e3bfa90d-26fb-44cd-8989-b11aff549b1d>
<urn:uuid:78027e45-4223-4e14-a0d7-b8db2002bf30>
<urn:uuid:a64773a4-a26c-4a44-be58-3977dbeade0d>
<urn:uuid:4e683dc2-60b8-4647-8fbc-99c0755a8eb2>
<urn:uuid:155d3345-511d-41fa-b477-d67bcb8471c0>
<urn:uuid:0bb4fcf5-cbfc-404a-bb1c-b9dc94444665>
<urn:uuid:65be2ca5-e962-4a43-ad9e-66c783c6c382>
<urn:uuid:27eff893-726e-4bde-b9f6-56b37929610c>
<urn:uuid:03b70581-d98b-4c76-8cb3-e6cd32ef7700>
) ;
<http://genome.db/sw/feature_codon_start> 1 ;
<http://genome.db/sw/feature_gene> "PfCRT" ;
<http://genome.db/sw/feature_gene_synonym> "CRT; digestive vacuole transmembrane protein" ;
<http://genome.db/sw/feature_locus_tag> "MAL7P1.27" ;
<http://genome.db/sw/location> <urn:uuid:99d11132-6cab-44cd-b651-56549491325a> ;
<http://genome.db/sw/xref> <urn:xref:InterPro:IPR017258>, <urn:xref:UniProtKB%2FTrEMBL:Q8IBZ9>, <urn:xref:geneid:2655199>, <urn:xref:gi:124511742>, <urn:xref:protein:XP_001349004.1> ;
<http://purl.org/dc/terms/isPartOf> <urn:uuid:5c3a336b-8d9c-4c88-a514-b390859d53e9> ;
a <http://purl.obolibrary.org/obo/SO_0000316> ;
<http://www.w3.org/2000/01/rdf-schema#label> "MAL7P1.27" .
# location of CDS
<urn:uuid:99d11132-6cab-44cd-b651-56549491325a>
<http://biohackathon.org/faldo/start> <urn:uuid:88bd57e0-960b-409d-a211-54f179a93884> ;
<http://biohackathon.org/faldo/end> <urn:uuid:dc966d60-5e01-421b-913a-8378f48a0b8b> ;
<http://genome.db/sw/position> "join(458600..458690,458868..459136,459316..459488,459661..459793,459947..460018,460142..460217,460314..460396,460524..460574,460712..460768,460917..461009,461203..461247,461395..461449,461619..461695)" .
# location start
<urn:uuid:88bd57e0-960b-409d-a211-54f179a93884>
<http://biohackathon.org/faldo/position> 458600 ;
<http://biohackathon.org/faldo/reference> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://biohackathon.org/faldo/ExactlyKnownPosition>, <http://biohackathon.org/faldo/ForwardStrandPosition> .
# location end
<urn:uuid:dc966d60-5e01-421b-913a-8378f48a0b8b>
<http://biohackathon.org/faldo/position> 461695 ;
<http://biohackathon.org/faldo/reference> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://biohackathon.org/faldo/ExactlyKnownPosition>, <http://biohackathon.org/faldo/ForwardStrandPosition> .
# exon1
<urn:uuid:b404b2d3-3642-4803-8523-76570cf846be>
<http://biohackathon.org/faldo/start> <urn:uuid:6e1fe409-7879-4a68-bf21-33398c79e50d> ;
<http://biohackathon.org/faldo/end> <urn:uuid:90256c52-22aa-4852-90eb-32809c3bbc68> ;
<http://purl.org/dc/terms/isPartOf> <urn:uuid:99d11132-6cab-44cd-b651-56549491325a> ;
<http://www.w3.org/2000/01/rdf-schema#type> <http://purl.obolibrary.org/obo/SO_0000147> .
# exon1 start
<urn:uuid:6e1fe409-7879-4a68-bf21-33398c79e50d>
<http://biohackathon.org/faldo/position> 458600 ;
<http://biohackathon.org/faldo/reference> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://biohackathon.org/faldo/ExactlyKnownPosition>, <http://biohackathon.org/faldo/ForwardStrandPosition> .
# exon1 end
<urn:uuid:90256c52-22aa-4852-90eb-32809c3bbc68>
<http://biohackathon.org/faldo/position> 458690 ;
<http://biohackathon.org/faldo/reference> <urn:uuid:63a091c1-1409-4a91-9f46-db9b04bce8f6> ;
a <http://biohackathon.org/faldo/ExactlyKnownPosition>, <http://biohackathon.org/faldo/ForwardStrandPosition> .
# exon2
<urn:uuid:d925c5bf-ec45-4542-82c8-c59f6ea6ddf4>
<http://biohackathon.org/faldo/start> <urn:uuid:2f3b1a61-0a1e-4c4b-b2e9-5604780de5ed> ;
<http://biohackathon.org/faldo/end> <urn:uuid:fdb47c15-f585-4393-a94d-1dcd42d5d60b> ;
<http://purl.org/dc/terms/isPartOf> <urn:uuid:99d11132-6cab-44cd-b651-56549491325a> ;
<http://www.w3.org/2000/01/rdf-schema#type> <http://purl.obolibrary.org/obo/SO_0000147> .
:
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment