Skip to content

Instantly share code, notes, and snippets.

@seandavi
Created May 24, 2019 18:09
Show Gist options
  • Save seandavi/73d967fcd730b85fe573004af2fb99d8 to your computer and use it in GitHub Desktop.
Save seandavi/73d967fcd730b85fe573004af2fb99d8 to your computer and use it in GitHub Desktop.
digraph Workflow {
node [shape="box"];
subgraph cluster_0 {
SRA_XML [shape=record label="SRA XML|{Study|Sample|Experiment|Run}"]
XML_TO_JSON [label="XML to JSON"]
SRA_JSON [shape=record label="SRA JSON|{Study|Sample|Experiment|Run}"]
BIOSAMPLE_XML [label="Biosample XML"]
BIOSAMPLE_JSON [label="Biosample JSON"]
SRA_TSV [shape=record label="SRA TSV|{file info|run info|livelist|accessions}"]
LOAD_TO_POSTGRES [label="Load to staging\ndatabase"]
ETL [label="Transform to\nPostgreSQL Schema"]
GRAPHQL_API [label="Populate\nGraphQL API"]
OMICIDX_OUTPUT [shape=parallelogram label="OmicIDX API"]
{ rank=same SRA_XML BIOSAMPLE_XML SRA_TSV }
{SRA_XML BIOSAMPLE_XML} -> XML_TO_JSON
XML_TO_JSON -> {SRA_JSON BIOSAMPLE_JSON}
{BIOSAMPLE_JSON SRA_JSON SRA_TSV} -> LOAD_TO_POSTGRES
LOAD_TO_POSTGRES -> ETL -> GRAPHQL_API -> OMICIDX_OUTPUT
label="OmicIDX Metadata Pipeline";
fontsize=30;
color=black;
}
subgraph cluster_2 {
SRA [shape=record label="{Human|Mouse|Others}"]
GDC [shape=box]
dbGaP [shape=box]
HUMAN_TX [label="Human\nGencode v29 transcript reference"]
MOUSE_TX [label="Mouse\nGencode v19 transcript reference"]
OTHER_TX [label="Other organism-specific\ntranscript references"]
SALMON_IDX [label="Build Salmon index"]
SALMON_IDX -> SALMON_QUANT
{rank=same HUMAN_TX MOUSE_TX OTHER_TX} -> SALMON_IDX
subgraph cluster_1 {
FASTQ [label="Generate FASTQ"]
SALMON_QUANT [label="Salmon Quantification"]
SALMON_GENE [label="Gene counts" color=red]
SALMON_TX [label="Transcript counts" color=red]
SALMON_QC [label="Quality control metrics" color=red]
SALMON_PARAMS [label="Run parameters" color=red]
TRANSFER_TO_S3 [label="Automated transfer to s3"]
{SRA GDC dbGaP} -> FASTQ
FASTQ -> SALMON_QUANT
SALMON_QUANT -> {rank=same SALMON_GENE SALMON_TX SALMON_PARAMS SALMON_QC}
SALMON_GENE -> TRANSFER_TO_S3
SALMON_TX -> TRANSFER_TO_S3
SALMON_PARAMS -> TRANSFER_TO_S3
SALMON_QC -> TRANSFER_TO_S3
label="Per-sample pipeline\n(Runs on NIH HPC resources)";
color=black;
fontsize=20
}
AWS_S3 [label="AWS s3 storage"]
S3_TRIGGER [label="AWS s3 trigger"]
UPDATE_POSTGRES [label="Update postgresql metadata"]
TRANSFER_TO_S3 -> AWS_S3 -> S3_TRIGGER -> UPDATE_POSTGRES
fontsize=30;
label="BigRNA Pipeline";
color=black
}
BIGRNA_API [label="BigRNA API\n& Data Access" fontsize=20 shape=parallelogram]
UPDATE_POSTGRES -> BIGRNA_API
OMICIDX_OUTPUT -> BIGRNA_API
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment