Thanks to skwsm-san!
Original FastQC data used for modeling is embedded here.
<Quanto Record> a :SequenceStatisticsReport .
<Quanto Record>
:filename "ERR055260.fastq";
:fileType "Conventional base calls";
:encoding "Sanger / Illumina 1.9";
:totalSequences [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 33692804;
];
:filteredSequences [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 0;
];
:sequenceLength [
a :SequenceReadLength;
rdf:value 36;
];
:percentGC [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 40;
] .
<Quanto Record> :hasMatrix [
a :PerBaseSequenceQuality;
:hasRow [
a :Row;
a :ExactBaseStatistics;
:rowIndex 0;
:basePosition “1”;
:meanBaseCallQuality [
a :PhredQualityScore;
rdf:value 36.0;
];
:medianBaseCallQuality [
a :PhredQualityScore;
rdf:value 36.0;
];
:baseCallQualityLowerQuartile [
a :PhredQualityScore;
rdf:value 36.0;
];
:baseCallQualityUpperQuartile [
a :PhredQualityScore;
rdf:value 36.0;
];
:baseCallQuality10thPercentile [
a :PhredQualityScore;
rdf:value 36.0;
];
:baseCallQuality90thPercentile [
a :PhredQualityScore;
rdf:value 36.0;
];
] ;
] .
<Quanto Record> :hasMatrix [
a :PerSequenceQualityScores;
:hasRow [
a :Row;
:rowIndex 0;
:baseCallQuality [
a :PhredQualityScore;
rdf:value 2;
];
:sequenceReadCount [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 50286.0;
];
];
] .
<Quanto Record> :hasMatrix [
a :PerBaseSequenceContent;
:hasRow [
a :Row;
a :ExactBaseStatistics;
:rowIndex 0;
:basePosition “1”;
:percentGuanine [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 21.568317674005407;
];
:percentAdenine [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 27.723905080740685;
];
:percentThymine [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 28.783710017130065;
];
:percentCytosine [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 21.924067228123846;
];
];
] .
<Quanto Record> :hasMatrix [
a :PerBaseGCContent;
:hasRow [
a :Row;
a :ExactBaseStatistics;
:rowIndex 0;
:basePosition “1”;
:percentGC [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 43.49238490212925;
];
];
] .
<Quanto Record> :hasMatrix [
a :PerSequenceGCContent;
:hasRow [
a :Row;
:rowIndex 0;
:percentGC [
a :NucleotideBaseContent;
:hasUnit uo:Percent;
rdf:value 0;
];
:sequenceReadCount [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 2030.0;
];
];
] .
<Quanto Record> :hasMatrix [
a :PerBaseNContent;
:hasRow [
a :Row;
a :ExactBaseStatistics;
:rowIndex 0;
:basePosition “1”;
:nCount [
a :NContent;
:hasUnit uo:CountUnit;
rdf:value 0.0
];
];
] .
<Quanto Record> :hasMatrix [
a :SequenceLengthDistribution;
:hasRow [
a :Row;
:rowIndex 0;
:sequenceReadLength [
a :SequenceReadLength;
:hasUnit uo:CountUnit;
rdf:value 36;
];
:sequenceReadCount [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 3.3692804E7;
];
];
] .
<Quanto Record> :hasMatrix [
a :SequenceDuplicationLevels;
:hasRow [
a :Row;
:rowIndex 0;
:sequenceDuplicationLevel [
a :SequenceDuplicationLevel;
:hasUnit uo:CountUnit;
rdf:value 1;
];
:sequenceReadRelativeCount [
a :SequenceReadContent
:hasUnit uo:CountUnit;
rdf:value 100;
];
];
] .
<Quanto Record> :hasMatrix [
a :OverrepresentedSequences;
:hasRow [
a :Row;
:rowIndex 0;
:overrepresentedSequence "GATCGGAAGAGCGGTTCAGCAGGAATGCCGAGATCG";
:sequenceReadCount [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 66145;
];
:sequenceReadPercentage [
a :SequenceReadContent;
:hasUnit uo:Percentage;
rdf:value 0.19631788437673514;
];
:possibleSourceOfSequence "Illumina Paired End PCR Primer 2 (97% over 36bp)";
];
] .
<Quanto Record> :hasMatrix [
a :KmerContent;
:hasRow [
a :Row;
:rowIndex 0;
:kmerSequence "CTATG";
:sequenceReadCount [
a :SequenceReadContent;
:hasUnit uo:CountUnit;
rdf:value 3682525;
];
:observedPerExpectedOverall [
a :SequenceReadContent;
:hasUnit uo:ratio;
rdf:value 3.1166635;
];
:observedPerExpectedMax [
a :SequenceReadContent;
:hasUnit uo:ratio;
rdf:value 3.6598775;
];
:observedPerExpectedMaxPosition "6";
];
] .
<Quanto Record>
:minSequenceLength 36;
:maxSequenceLength 36;
:meanSequenceLength 36;
:medianSequenceLength 36;
:overallMeanBaseCallQuality [
a :PhredQualityScore;
rdf:value 40;
];
:overallMedianBaseCallQuality [
a :PhredQualityScore;
rdf:value 40;
];
:overallNContent [
a :NContent;
:hasUnit uo:Percentage
rdf:value 0.1;
] .
- SequenceStatisticsReport
- SequenceStatisticsMatrix
- FastQCStatisticsMatrix
- Row
- ExactBaseStatistics
- BaseRangeStatistics
has_parent :FastQCStatisticsMatrix
- PerBaseSequenceQuality
- PerTileSequenceQuality
- PerSequnceQualityScores
- PerBaseSequenceContent
- PerSequenceGCContent
- PerBaseNContent
- SequenceLengthDistribution
- SequenceDuplicationLevels
- OverrepresentedSequences
- KmerContent
- PhredQualityScore
- NucleotideBaseContent
- SequenceReadContent
- SequenceReadLength
- SequenceDuplicationLevel
- object properties
- hasMatrix
- data properties
- filename
- fileType
- encoding
- totalSequences
- filteredSequences
- sequenceLength
- percentGC
- hasRow
- object properties
- baseCallQuality
- baseCallQuality10thPercentile
- baseCallQuality90thPercentile
- baseCallQualityLowerQuartile
- baseCallQualityUpperQuartile
- kmerSequence
- meanBaseCallQuality
- medianBaseCallQuality
- nCount
- observedPerExpectedMax
- observedPerExpectedMaxPosition
- observedPerExpectedOverall
- overrepresentedSequence
- percentAdenine
- percentCytosine
- percentGuanine
- percentThymine
- possibleSourceOfSequence
- sequenceDuplicationLevel
- sequenceReadCount
- sequenceReadLength
- sequenceReadPercentage
- sequenceReadRelativeCount
- data properties
- rowIndex
- basePosition
- percentGC
- object properties
- hasUnit
- object properties
- overallMeanBaseCallQuality
- overallMedianBaseCallQuality
- overallNContent
- data properties
- minSequenceLength
- maxSequenceLength
- meanSequenceLength
- medianSequenceLength