Skip to content

Instantly share code, notes, and snippets.

@mpkocher
Last active October 2, 2019 14:59
Show Gist options
  • Save mpkocher/9de553f50d4b376a30959fb2c33ac9fe to your computer and use it in GitHub Desktop.
Save mpkocher/9de553f50d4b376a30959fb2c33ac9fe to your computer and use it in GitHub Desktop.
High Level Overview of the SMRT Link Secondary Analysis System
digraph {
subgraph cluster_0 {
c0_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
c0_store [shape=cylinder, label="DataStore"]
c0_ep1 [shape=diamond, label="Path /path/to/alpha_subreadset.xml"]
c0_ep1 -> c0_Job
c0_Job -> c0_store
c0_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
c0_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c0_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c0_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c0_store -> c0_dsf_02
c0_store -> c0_dsf_04
c0_store -> c0_dsf_05
c0_store -> c0_dsf_03
c0_dsf_02 -> c0_dsf_04 [style=dotted]
c0_dsf_02 -> c0_dsf_05 [style=dotted]
}
subgraph cluster_2 {
c2_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
c2_store [shape=cylinder, label="DataStore"]
c2_ep1 [shape=diamond, label="Path /path/to/beta_subreadset.xml"]
c2_ep1 -> c2_Job
c2_Job -> c2_store
c2_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
c2_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c2_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c2_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c2_store -> c2_dsf_02
c2_store -> c2_dsf_04
c2_store -> c2_dsf_05
c2_store -> c2_dsf_03
c2_dsf_02 -> c2_dsf_04 [style=dotted]
c2_dsf_02 -> c2_dsf_05 [style=dotted]
}
subgraph cluster_3 {
c3_Job [shape=hexagon, color=blue, label="Merge DataSets Job"]
c3_store [shape=cylinder, label="DataStore"]
c3_ep1 [shape=diamond, label="Path /path/to/gamma_subreadset.xml"]
c3_ep1 -> c3_Job
c3_Job -> c3_store
c3_dsf_02 [shape=tab, label="DataStoreFile (Merged SubreadSet)"]
c3_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c3_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c3_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c3_store -> c3_dsf_02
c3_store -> c3_dsf_04
c3_store -> c3_dsf_05
c3_store -> c3_dsf_03
c3_dsf_02 -> c3_dsf_04 [style=dotted]
c3_dsf_02 -> c3_dsf_05 [style=dotted]
}
subgraph cluster_4 {
c4_Job [shape=hexagon, color=blue, label="Import DataSet Job"]
c4_store [shape=cylinder, label="DataStore"]
c4_ep1 [shape=diamond, label="Path /path/to/referenceset.xml"]
c4_ep1 -> c4_Job
c4_Job -> c4_store
c4_dsf_02 [shape=tab, label="DataStoreFile (ReferenceSet)"]
c4_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c4_store -> c4_dsf_02
c4_store -> c4_dsf_03
}
subgraph cluster_5 {
c5_Job [shape=hexagon, color=blue, label="Copy (and filter) DataSet"]
c5_store [shape=cylinder, label="DataStore"]
c5_ep1 [shape=diamond, label="DataSet UUID=X,filter=rq >= 0.7"]
c5_ep1 -> c5_Job
c5_Job -> c5_store
c5_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
c5_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c5_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c5_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c5_store -> c5_dsf_02
c5_store -> c5_dsf_04
c5_store -> c5_dsf_05
c5_store -> c5_dsf_03
c5_dsf_02 -> c5_dsf_04 [style=dotted]
c5_dsf_02 -> c5_dsf_05 [style=dotted]
}
subgraph cluster_6 {
c6_Job [shape=hexagon, color=blue, label="Export DataSet(s) Zip Job"]
c6_store [shape=cylinder, label="DataStore"]
c6_ep1 [shape=diamond, label="DataSet UUIDs=X,Y,Z"]
c6_ep1 -> c6_Job
c6_Job -> c6_store
c6_dsf_02 [shape=tab, label="DataStoreFile DataSet XML(s) Zip"]
c6_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c6_store -> c6_dsf_02
c6_store -> c6_dsf_03
}
subgraph cluster_01 {
c1_Job [shape=hexagon, color=blue, label="Analysis Job"]
c1_store [shape=cylinder, label="DataStore"]
c1_ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
c1_ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
c1_ep1 -> c1_Job
c1_ep2 -> c1_Job
c1_Job -> c1_store
c1_dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
c1_dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
c1_dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
c1_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c1_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c1_dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
c1_store -> c1_dsf_01
c1_store -> c1_dsf_02
c1_store -> c1_dsf_03
c1_store -> c1_dsf_04
c1_store -> c1_dsf_05
c1_store -> c1_dsf_06
c1_ep1 -> c1_dsf_04 [style=dotted]
c1_dsf_02 -> c1_dsf_05 [style=dotted]
c1_ep2 -> c1_dsf_05 [style=dotted]
}
c3_dsf_02 -> c1_ep1
c4_dsf_02 -> c1_ep2
c0_dsf_02 -> c5_ep1
c2_dsf_02 -> c3_ep1
c5_dsf_02 -> c3_ep1
c5_dsf_02 -> c6_ep1
}
Display the source blob
Display the rendered blob
Raw
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
-->
<!-- Title: %3 Pages: 1 -->
<svg width="2118pt" height="1228pt"
viewBox="0.00 0.00 2118.00 1228.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1224)">
<title>%3</title>
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1224 2114,-1224 2114,4 -4,4"/>
<g id="clust1" class="cluster">
<title>cluster_0</title>
<polygon fill="none" stroke="#000000" points="926,-872 926,-1212 1322,-1212 1322,-872 926,-872"/>
</g>
<g id="clust2" class="cluster">
<title>cluster_2</title>
<polygon fill="none" stroke="#000000" points="508,-584 508,-924 892,-924 892,-584 508,-584"/>
</g>
<g id="clust3" class="cluster">
<title>cluster_3</title>
<polygon fill="none" stroke="#000000" points="900,-296 900,-636 1322,-636 1322,-296 900,-296"/>
</g>
<g id="clust4" class="cluster">
<title>cluster_4</title>
<polygon fill="none" stroke="#000000" points="110,-368 110,-636 455,-636 455,-368 110,-368"/>
</g>
<g id="clust6" class="cluster">
<title>cluster_6</title>
<polygon fill="none" stroke="#000000" points="1724,-368 1724,-636 2102,-636 2102,-368 1724,-368"/>
</g>
<g id="clust7" class="cluster">
<title>cluster_01</title>
<polygon fill="none" stroke="#000000" points="8,-8 8,-348 892,-348 892,-8 8,-8"/>
</g>
<g id="clust5" class="cluster">
<title>cluster_5</title>
<polygon fill="none" stroke="#000000" points="1330,-584 1330,-924 1716,-924 1716,-584 1330,-584"/>
</g>
<!-- c0_Job -->
<g id="node1" class="node">
<title>c0_Job</title>
<polygon fill="none" stroke="#0000ff" points="1215.1584,-1114 1169.5792,-1132 1078.4208,-1132 1032.8416,-1114 1078.4208,-1096 1169.5792,-1096 1215.1584,-1114"/>
<text text-anchor="middle" x="1124" y="-1109.8" font-family="Times,serif" font-size="14.00" fill="#000000">Import DataSet Job</text>
</g>
<!-- c0_store -->
<g id="node2" class="node">
<title>c0_store</title>
<path fill="none" stroke="#000000" d="M1159.9795,-1056.7273C1159.9795,-1058.5331 1143.8529,-1060 1124,-1060 1104.1471,-1060 1088.0205,-1058.5331 1088.0205,-1056.7273 1088.0205,-1056.7273 1088.0205,-1027.2727 1088.0205,-1027.2727 1088.0205,-1025.4669 1104.1471,-1024 1124,-1024 1143.8529,-1024 1159.9795,-1025.4669 1159.9795,-1027.2727 1159.9795,-1027.2727 1159.9795,-1056.7273 1159.9795,-1056.7273"/>
<path fill="none" stroke="#000000" d="M1159.9795,-1056.7273C1159.9795,-1054.9214 1143.8529,-1053.4545 1124,-1053.4545 1104.1471,-1053.4545 1088.0205,-1054.9214 1088.0205,-1056.7273"/>
<text text-anchor="middle" x="1124" y="-1037.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c0_Job&#45;&gt;c0_store -->
<g id="edge2" class="edge">
<title>c0_Job&#45;&gt;c0_store</title>
<path fill="none" stroke="#000000" d="M1124,-1095.8314C1124,-1088.131 1124,-1078.9743 1124,-1070.4166"/>
<polygon fill="#000000" stroke="#000000" points="1127.5001,-1070.4132 1124,-1060.4133 1120.5001,-1070.4133 1127.5001,-1070.4132"/>
</g>
<!-- c0_dsf_02 -->
<g id="node4" class="node">
<title>c0_dsf_02</title>
<polygon fill="none" stroke="#000000" points="1146.349,-988 987.651,-988 987.651,-992 975.651,-992 975.651,-952 1146.349,-952 1146.349,-988"/>
<polyline fill="none" stroke="#000000" points="975.651,-988 987.651,-988 "/>
<text text-anchor="middle" x="1061" y="-965.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (SubreadSet)</text>
</g>
<!-- c0_store&#45;&gt;c0_dsf_02 -->
<g id="edge3" class="edge">
<title>c0_store&#45;&gt;c0_dsf_02</title>
<path fill="none" stroke="#000000" d="M1108.1024,-1023.8314C1100.7722,-1015.454 1091.934,-1005.3531 1083.9031,-996.1749"/>
<polygon fill="#000000" stroke="#000000" points="1086.3307,-993.6343 1077.1116,-988.4133 1081.0627,-998.2438 1086.3307,-993.6343"/>
</g>
<!-- c0_dsf_04 -->
<g id="node5" class="node">
<title>c0_dsf_04</title>
<polygon fill="none" stroke="#000000" points="1099.6868,-916 946.3132,-916 946.3132,-920 934.3132,-920 934.3132,-880 1099.6868,-880 1099.6868,-916"/>
<polyline fill="none" stroke="#000000" points="934.3132,-916 946.3132,-916 "/>
<text text-anchor="middle" x="1017" y="-893.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_01)</text>
</g>
<!-- c0_store&#45;&gt;c0_dsf_04 -->
<g id="edge4" class="edge">
<title>c0_store&#45;&gt;c0_dsf_04</title>
<path fill="none" stroke="#000000" d="M1087.9361,-1034.6467C1046.1753,-1025.3142 981.2728,-1008.0025 967,-988 952.2436,-967.3196 969.2856,-941.8601 987.3653,-923.3031"/>
<polygon fill="#000000" stroke="#000000" points="989.8278,-925.7906 994.5482,-916.3054 984.943,-920.7766 989.8278,-925.7906"/>
</g>
<!-- c0_dsf_05 -->
<g id="node6" class="node">
<title>c0_dsf_05</title>
<polygon fill="none" stroke="#000000" points="1283.6868,-916 1130.3132,-916 1130.3132,-920 1118.3132,-920 1118.3132,-880 1283.6868,-880 1283.6868,-916"/>
<polyline fill="none" stroke="#000000" points="1118.3132,-916 1130.3132,-916 "/>
<text text-anchor="middle" x="1201" y="-893.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_02)</text>
</g>
<!-- c0_store&#45;&gt;c0_dsf_05 -->
<g id="edge5" class="edge">
<title>c0_store&#45;&gt;c0_dsf_05</title>
<path fill="none" stroke="#000000" d="M1134.8409,-1023.6305C1140.8969,-1013.2424 1148.508,-999.9739 1155,-988 1166.291,-967.1747 1178.4658,-943.2853 1187.4522,-925.3549"/>
<polygon fill="#000000" stroke="#000000" points="1190.7554,-926.5742 1192.0908,-916.0639 1184.4925,-923.4474 1190.7554,-926.5742"/>
</g>
<!-- c0_dsf_03 -->
<g id="node7" class="node">
<title>c0_dsf_03</title>
<polygon fill="none" stroke="#000000" points="1313.6348,-988 1196.3652,-988 1196.3652,-992 1184.3652,-992 1184.3652,-952 1313.6348,-952 1313.6348,-988"/>
<polyline fill="none" stroke="#000000" points="1184.3652,-988 1196.3652,-988 "/>
<text text-anchor="middle" x="1249" y="-965.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Log)</text>
</g>
<!-- c0_store&#45;&gt;c0_dsf_03 -->
<g id="edge6" class="edge">
<title>c0_store&#45;&gt;c0_dsf_03</title>
<path fill="none" stroke="#000000" d="M1153.9406,-1024.7542C1170.3011,-1015.3306 1190.7913,-1003.5282 1208.555,-993.2963"/>
<polygon fill="#000000" stroke="#000000" points="1210.641,-996.1339 1217.5594,-988.1098 1207.1471,-990.0682 1210.641,-996.1339"/>
</g>
<!-- c0_ep1 -->
<g id="node3" class="node">
<title>c0_ep1</title>
<polygon fill="none" stroke="#000000" points="1124,-1204 933.9111,-1186 1124,-1168 1314.0889,-1186 1124,-1204"/>
<text text-anchor="middle" x="1124" y="-1181.8" font-family="Times,serif" font-size="14.00" fill="#000000">Path /path/to/alpha_subreadset.xml</text>
</g>
<!-- c0_ep1&#45;&gt;c0_Job -->
<g id="edge1" class="edge">
<title>c0_ep1&#45;&gt;c0_Job</title>
<path fill="none" stroke="#000000" d="M1124,-1167.8314C1124,-1160.131 1124,-1150.9743 1124,-1142.4166"/>
<polygon fill="#000000" stroke="#000000" points="1127.5001,-1142.4132 1124,-1132.4133 1120.5001,-1142.4133 1127.5001,-1142.4132"/>
</g>
<!-- c0_dsf_02&#45;&gt;c0_dsf_04 -->
<g id="edge7" class="edge">
<title>c0_dsf_02&#45;&gt;c0_dsf_04</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M1049.8969,-951.8314C1044.9326,-943.7079 1038.9778,-933.9637 1033.5072,-925.0118"/>
<polygon fill="#000000" stroke="#000000" points="1036.4536,-923.121 1028.2526,-916.4133 1030.4806,-926.7712 1036.4536,-923.121"/>
</g>
<!-- c0_dsf_02&#45;&gt;c0_dsf_05 -->
<g id="edge8" class="edge">
<title>c0_dsf_02&#45;&gt;c0_dsf_05</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M1096.3279,-951.8314C1114.6493,-942.4089 1137.2102,-930.8062 1156.6895,-920.7883"/>
<polygon fill="#000000" stroke="#000000" points="1158.3912,-923.8489 1165.6834,-916.1628 1155.1898,-917.6239 1158.3912,-923.8489"/>
</g>
<!-- c5_ep1 -->
<g id="node29" class="node">
<title>c5_ep1</title>
<polygon fill="none" stroke="#000000" points="1523,-916 1338.3666,-898 1523,-880 1707.6334,-898 1523,-916"/>
<text text-anchor="middle" x="1523" y="-893.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataSet UUID=X,filter=rq &gt;= 0.7</text>
</g>
<!-- c0_dsf_02&#45;&gt;c5_ep1 -->
<g id="edge55" class="edge">
<title>c0_dsf_02&#45;&gt;c5_ep1</title>
<path fill="none" stroke="#000000" d="M1146.2793,-956.5095C1155.9657,-954.9847 1165.696,-953.4564 1175,-952 1266.1088,-937.7384 1370.6627,-921.5367 1441.1906,-910.6318"/>
<polygon fill="#000000" stroke="#000000" points="1441.996,-914.0489 1451.3438,-909.0622 1440.9265,-907.1311 1441.996,-914.0489"/>
</g>
<!-- c2_Job -->
<g id="node8" class="node">
<title>c2_Job</title>
<polygon fill="none" stroke="#0000ff" points="791.1584,-826 745.5792,-844 654.4208,-844 608.8416,-826 654.4208,-808 745.5792,-808 791.1584,-826"/>
<text text-anchor="middle" x="700" y="-821.8" font-family="Times,serif" font-size="14.00" fill="#000000">Import DataSet Job</text>
</g>
<!-- c2_store -->
<g id="node9" class="node">
<title>c2_store</title>
<path fill="none" stroke="#000000" d="M735.9795,-768.7273C735.9795,-770.5331 719.8529,-772 700,-772 680.1471,-772 664.0205,-770.5331 664.0205,-768.7273 664.0205,-768.7273 664.0205,-739.2727 664.0205,-739.2727 664.0205,-737.4669 680.1471,-736 700,-736 719.8529,-736 735.9795,-737.4669 735.9795,-739.2727 735.9795,-739.2727 735.9795,-768.7273 735.9795,-768.7273"/>
<path fill="none" stroke="#000000" d="M735.9795,-768.7273C735.9795,-766.9214 719.8529,-765.4545 700,-765.4545 680.1471,-765.4545 664.0205,-766.9214 664.0205,-768.7273"/>
<text text-anchor="middle" x="700" y="-749.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c2_Job&#45;&gt;c2_store -->
<g id="edge10" class="edge">
<title>c2_Job&#45;&gt;c2_store</title>
<path fill="none" stroke="#000000" d="M700,-807.8314C700,-800.131 700,-790.9743 700,-782.4166"/>
<polygon fill="#000000" stroke="#000000" points="703.5001,-782.4132 700,-772.4133 696.5001,-782.4133 703.5001,-782.4132"/>
</g>
<!-- c2_dsf_02 -->
<g id="node11" class="node">
<title>c2_dsf_02</title>
<polygon fill="none" stroke="#000000" points="716.349,-700 557.651,-700 557.651,-704 545.651,-704 545.651,-664 716.349,-664 716.349,-700"/>
<polyline fill="none" stroke="#000000" points="545.651,-700 557.651,-700 "/>
<text text-anchor="middle" x="631" y="-677.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (SubreadSet)</text>
</g>
<!-- c2_store&#45;&gt;c2_dsf_02 -->
<g id="edge11" class="edge">
<title>c2_store&#45;&gt;c2_dsf_02</title>
<path fill="none" stroke="#000000" d="M682.5884,-735.8314C674.479,-727.3694 664.6844,-717.1489 655.818,-707.8971"/>
<polygon fill="#000000" stroke="#000000" points="658.0921,-705.2115 648.646,-700.4133 653.0382,-710.0549 658.0921,-705.2115"/>
</g>
<!-- c2_dsf_04 -->
<g id="node12" class="node">
<title>c2_dsf_04</title>
<polygon fill="none" stroke="#000000" points="681.6868,-628 528.3132,-628 528.3132,-632 516.3132,-632 516.3132,-592 681.6868,-592 681.6868,-628"/>
<polyline fill="none" stroke="#000000" points="516.3132,-628 528.3132,-628 "/>
<text text-anchor="middle" x="599" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_01)</text>
</g>
<!-- c2_store&#45;&gt;c2_dsf_04 -->
<g id="edge12" class="edge">
<title>c2_store&#45;&gt;c2_dsf_04</title>
<path fill="none" stroke="#000000" d="M663.7163,-747.0869C620.4309,-738.0163 552.0097,-720.7845 537,-700 527.6327,-687.0287 530.3719,-678.5626 537,-664 542.174,-652.6322 551.0543,-642.7268 560.5028,-634.6122"/>
<polygon fill="#000000" stroke="#000000" points="562.8729,-637.1986 568.5042,-628.2243 558.5055,-631.7281 562.8729,-637.1986"/>
</g>
<!-- c2_dsf_05 -->
<g id="node13" class="node">
<title>c2_dsf_05</title>
<polygon fill="none" stroke="#000000" points="865.6868,-628 712.3132,-628 712.3132,-632 700.3132,-632 700.3132,-592 865.6868,-592 865.6868,-628"/>
<polyline fill="none" stroke="#000000" points="700.3132,-628 712.3132,-628 "/>
<text text-anchor="middle" x="783" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_02)</text>
</g>
<!-- c2_store&#45;&gt;c2_dsf_05 -->
<g id="edge13" class="edge">
<title>c2_store&#45;&gt;c2_dsf_05</title>
<path fill="none" stroke="#000000" d="M708.0333,-735.9924C716.4973,-717.5181 730.5776,-688.1462 745,-664 750.5551,-654.6996 757.187,-644.9072 763.3438,-636.2534"/>
<polygon fill="#000000" stroke="#000000" points="766.2601,-638.1934 769.2847,-628.0395 760.5882,-634.091 766.2601,-638.1934"/>
</g>
<!-- c2_dsf_03 -->
<g id="node14" class="node">
<title>c2_dsf_03</title>
<polygon fill="none" stroke="#000000" points="883.6348,-700 766.3652,-700 766.3652,-704 754.3652,-704 754.3652,-664 883.6348,-664 883.6348,-700"/>
<polyline fill="none" stroke="#000000" points="754.3652,-700 766.3652,-700 "/>
<text text-anchor="middle" x="819" y="-677.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Log)</text>
</g>
<!-- c2_store&#45;&gt;c2_dsf_03 -->
<g id="edge14" class="edge">
<title>c2_store&#45;&gt;c2_dsf_03</title>
<path fill="none" stroke="#000000" d="M728.5035,-736.7542C744.0787,-727.3306 763.5854,-715.5282 780.4963,-705.2963"/>
<polygon fill="#000000" stroke="#000000" points="782.3245,-708.2811 789.0685,-700.1098 778.7008,-702.292 782.3245,-708.2811"/>
</g>
<!-- c2_ep1 -->
<g id="node10" class="node">
<title>c2_ep1</title>
<polygon fill="none" stroke="#000000" points="700,-916 516.4175,-898 700,-880 883.5825,-898 700,-916"/>
<text text-anchor="middle" x="700" y="-893.8" font-family="Times,serif" font-size="14.00" fill="#000000">Path /path/to/beta_subreadset.xml</text>
</g>
<!-- c2_ep1&#45;&gt;c2_Job -->
<g id="edge9" class="edge">
<title>c2_ep1&#45;&gt;c2_Job</title>
<path fill="none" stroke="#000000" d="M700,-879.8314C700,-872.131 700,-862.9743 700,-854.4166"/>
<polygon fill="#000000" stroke="#000000" points="703.5001,-854.4132 700,-844.4133 696.5001,-854.4133 703.5001,-854.4132"/>
</g>
<!-- c2_dsf_02&#45;&gt;c2_dsf_04 -->
<g id="edge15" class="edge">
<title>c2_dsf_02&#45;&gt;c2_dsf_04</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M622.925,-663.8314C619.3898,-655.8771 615.164,-646.369 611.2544,-637.5723"/>
<polygon fill="#000000" stroke="#000000" points="614.4435,-636.1299 607.1837,-628.4133 608.0468,-638.9729 614.4435,-636.1299"/>
</g>
<!-- c2_dsf_02&#45;&gt;c2_dsf_05 -->
<g id="edge16" class="edge">
<title>c2_dsf_02&#45;&gt;c2_dsf_05</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M669.356,-663.8314C689.5044,-654.2874 714.3752,-642.5065 735.7083,-632.4013"/>
<polygon fill="#000000" stroke="#000000" points="737.3807,-635.482 744.9198,-628.038 734.3841,-629.1558 737.3807,-635.482"/>
</g>
<!-- c3_ep1 -->
<g id="node17" class="node">
<title>c3_ep1</title>
<polygon fill="none" stroke="#000000" points="1114,-628 914.4549,-610 1114,-592 1313.5451,-610 1114,-628"/>
<text text-anchor="middle" x="1114" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">Path /path/to/gamma_subreadset.xml</text>
</g>
<!-- c2_dsf_02&#45;&gt;c3_ep1 -->
<g id="edge56" class="edge">
<title>c2_dsf_02&#45;&gt;c3_ep1</title>
<path fill="none" stroke="#000000" d="M716.2669,-668.4292C725.9556,-666.9191 735.6898,-665.4162 745,-664 841.1319,-649.3771 951.47,-633.3262 1026.2597,-622.5557"/>
<polygon fill="#000000" stroke="#000000" points="1027.1722,-625.9605 1036.5717,-621.0716 1026.1751,-619.0319 1027.1722,-625.9605"/>
</g>
<!-- c3_Job -->
<g id="node15" class="node">
<title>c3_Job</title>
<polygon fill="none" stroke="#0000ff" points="1222.9641,-538 1175.9821,-556 1082.0179,-556 1035.0359,-538 1082.0179,-520 1175.9821,-520 1222.9641,-538"/>
<text text-anchor="middle" x="1129" y="-533.8" font-family="Times,serif" font-size="14.00" fill="#000000">Merge DataSets Job</text>
</g>
<!-- c3_store -->
<g id="node16" class="node">
<title>c3_store</title>
<path fill="none" stroke="#000000" d="M1180.9795,-480.7273C1180.9795,-482.5331 1164.8529,-484 1145,-484 1125.1471,-484 1109.0205,-482.5331 1109.0205,-480.7273 1109.0205,-480.7273 1109.0205,-451.2727 1109.0205,-451.2727 1109.0205,-449.4669 1125.1471,-448 1145,-448 1164.8529,-448 1180.9795,-449.4669 1180.9795,-451.2727 1180.9795,-451.2727 1180.9795,-480.7273 1180.9795,-480.7273"/>
<path fill="none" stroke="#000000" d="M1180.9795,-480.7273C1180.9795,-478.9214 1164.8529,-477.4545 1145,-477.4545 1125.1471,-477.4545 1109.0205,-478.9214 1109.0205,-480.7273"/>
<text text-anchor="middle" x="1145" y="-461.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c3_Job&#45;&gt;c3_store -->
<g id="edge18" class="edge">
<title>c3_Job&#45;&gt;c3_store</title>
<path fill="none" stroke="#000000" d="M1133.0375,-519.8314C1134.7487,-512.131 1136.7835,-502.9743 1138.6852,-494.4166"/>
<polygon fill="#000000" stroke="#000000" points="1142.1554,-494.9344 1140.9082,-484.4133 1135.3221,-493.4159 1142.1554,-494.9344"/>
</g>
<!-- c3_dsf_02 -->
<g id="node18" class="node">
<title>c3_dsf_02</title>
<polygon fill="none" stroke="#000000" points="1125.6407,-412 920.3593,-412 920.3593,-416 908.3593,-416 908.3593,-376 1125.6407,-376 1125.6407,-412"/>
<polyline fill="none" stroke="#000000" points="908.3593,-412 920.3593,-412 "/>
<text text-anchor="middle" x="1017" y="-389.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Merged SubreadSet)</text>
</g>
<!-- c3_store&#45;&gt;c3_dsf_02 -->
<g id="edge19" class="edge">
<title>c3_store&#45;&gt;c3_dsf_02</title>
<path fill="none" stroke="#000000" d="M1114.6658,-448.937C1097.7662,-439.431 1076.4886,-427.4623 1058.111,-417.1249"/>
<polygon fill="#000000" stroke="#000000" points="1059.7858,-414.0513 1049.3541,-412.1992 1056.3539,-420.1523 1059.7858,-414.0513"/>
</g>
<!-- c3_dsf_04 -->
<g id="node19" class="node">
<title>c3_dsf_04</title>
<polygon fill="none" stroke="#000000" points="1086.6868,-340 933.3132,-340 933.3132,-344 921.3132,-344 921.3132,-304 1086.6868,-304 1086.6868,-340"/>
<polyline fill="none" stroke="#000000" points="921.3132,-340 933.3132,-340 "/>
<text text-anchor="middle" x="1004" y="-317.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_01)</text>
</g>
<!-- c3_store&#45;&gt;c3_dsf_04 -->
<g id="edge20" class="edge">
<title>c3_store&#45;&gt;c3_dsf_04</title>
<path fill="none" stroke="#000000" d="M1148.465,-447.9829C1151.5002,-426.5586 1152.995,-390.8356 1135,-368 1130.6604,-362.4931 1103.2067,-352.4903 1074.5452,-343.2156"/>
<polygon fill="#000000" stroke="#000000" points="1075.4014,-339.8149 1064.8105,-340.1058 1073.2711,-346.483 1075.4014,-339.8149"/>
</g>
<!-- c3_dsf_05 -->
<g id="node20" class="node">
<title>c3_dsf_05</title>
<polygon fill="none" stroke="#000000" points="1270.6868,-340 1117.3132,-340 1117.3132,-344 1105.3132,-344 1105.3132,-304 1270.6868,-304 1270.6868,-340"/>
<polyline fill="none" stroke="#000000" points="1105.3132,-340 1117.3132,-340 "/>
<text text-anchor="middle" x="1188" y="-317.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_02)</text>
</g>
<!-- c3_store&#45;&gt;c3_dsf_05 -->
<g id="edge21" class="edge">
<title>c3_store&#45;&gt;c3_dsf_05</title>
<path fill="none" stroke="#000000" d="M1150.446,-447.7623C1157.8126,-423.0928 1171.021,-378.8598 1179.6469,-349.9731"/>
<polygon fill="#000000" stroke="#000000" points="1183.0906,-350.673 1182.5982,-340.0896 1176.3832,-348.6701 1183.0906,-350.673"/>
</g>
<!-- c3_dsf_03 -->
<g id="node21" class="node">
<title>c3_dsf_03</title>
<polygon fill="none" stroke="#000000" points="1313.6348,-412 1196.3652,-412 1196.3652,-416 1184.3652,-416 1184.3652,-376 1313.6348,-376 1313.6348,-412"/>
<polyline fill="none" stroke="#000000" points="1184.3652,-412 1196.3652,-412 "/>
<text text-anchor="middle" x="1249" y="-389.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Log)</text>
</g>
<!-- c3_store&#45;&gt;c3_dsf_03 -->
<g id="edge22" class="edge">
<title>c3_store&#45;&gt;c3_dsf_03</title>
<path fill="none" stroke="#000000" d="M1170.4413,-448.3868C1183.5882,-439.2851 1199.8313,-428.0399 1214.1293,-418.1412"/>
<polygon fill="#000000" stroke="#000000" points="1216.5057,-420.753 1222.7354,-412.1832 1212.5212,-414.9976 1216.5057,-420.753"/>
</g>
<!-- c3_ep1&#45;&gt;c3_Job -->
<g id="edge17" class="edge">
<title>c3_ep1&#45;&gt;c3_Job</title>
<path fill="none" stroke="#000000" d="M1117.7079,-592.2022C1119.3541,-584.3005 1121.331,-574.811 1123.1675,-565.9959"/>
<polygon fill="#000000" stroke="#000000" points="1126.6174,-566.5968 1125.2306,-556.0931 1119.7645,-565.1691 1126.6174,-566.5968"/>
</g>
<!-- c3_dsf_02&#45;&gt;c3_dsf_04 -->
<g id="edge23" class="edge">
<title>c3_dsf_02&#45;&gt;c3_dsf_04</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M1013.7196,-375.8314C1012.3292,-368.131 1010.6759,-358.9743 1009.1308,-350.4166"/>
<polygon fill="#000000" stroke="#000000" points="1012.5458,-349.6322 1007.3246,-340.4133 1005.6572,-350.8761 1012.5458,-349.6322"/>
</g>
<!-- c3_dsf_02&#45;&gt;c3_dsf_05 -->
<g id="edge24" class="edge">
<title>c3_dsf_02&#45;&gt;c3_dsf_05</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M1060.1505,-375.8314C1083.1224,-366.159 1111.5509,-354.1891 1135.7632,-343.9944"/>
<polygon fill="#000000" stroke="#000000" points="1137.3016,-347.1443 1145.1598,-340.038 1134.5852,-340.6929 1137.3016,-347.1443"/>
</g>
<!-- c1_ep1 -->
<g id="node41" class="node">
<title>c1_ep1</title>
<polygon fill="none" stroke="#000000" points="747,-340 610.1801,-322 747,-304 883.8199,-322 747,-340"/>
<text text-anchor="middle" x="747" y="-317.8" font-family="Times,serif" font-size="14.00" fill="#000000">EntryPoint (SubreadSet)</text>
</g>
<!-- c3_dsf_02&#45;&gt;c1_ep1 -->
<g id="edge53" class="edge">
<title>c3_dsf_02&#45;&gt;c1_ep1</title>
<path fill="none" stroke="#000000" d="M949.2161,-375.9243C903.8068,-363.8151 844.7754,-348.0734 802.2282,-336.7275"/>
<polygon fill="#000000" stroke="#000000" points="802.8929,-333.2825 792.3287,-334.0876 801.0892,-340.0462 802.8929,-333.2825"/>
</g>
<!-- c4_Job -->
<g id="node22" class="node">
<title>c4_Job</title>
<polygon fill="none" stroke="#0000ff" points="373.1584,-538 327.5792,-556 236.4208,-556 190.8416,-538 236.4208,-520 327.5792,-520 373.1584,-538"/>
<text text-anchor="middle" x="282" y="-533.8" font-family="Times,serif" font-size="14.00" fill="#000000">Import DataSet Job</text>
</g>
<!-- c4_store -->
<g id="node23" class="node">
<title>c4_store</title>
<path fill="none" stroke="#000000" d="M317.9795,-480.7273C317.9795,-482.5331 301.8529,-484 282,-484 262.1471,-484 246.0205,-482.5331 246.0205,-480.7273 246.0205,-480.7273 246.0205,-451.2727 246.0205,-451.2727 246.0205,-449.4669 262.1471,-448 282,-448 301.8529,-448 317.9795,-449.4669 317.9795,-451.2727 317.9795,-451.2727 317.9795,-480.7273 317.9795,-480.7273"/>
<path fill="none" stroke="#000000" d="M317.9795,-480.7273C317.9795,-478.9214 301.8529,-477.4545 282,-477.4545 262.1471,-477.4545 246.0205,-478.9214 246.0205,-480.7273"/>
<text text-anchor="middle" x="282" y="-461.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c4_Job&#45;&gt;c4_store -->
<g id="edge26" class="edge">
<title>c4_Job&#45;&gt;c4_store</title>
<path fill="none" stroke="#000000" d="M282,-519.8314C282,-512.131 282,-502.9743 282,-494.4166"/>
<polygon fill="#000000" stroke="#000000" points="285.5001,-494.4132 282,-484.4133 278.5001,-494.4133 285.5001,-494.4132"/>
</g>
<!-- c4_dsf_02 -->
<g id="node25" class="node">
<title>c4_dsf_02</title>
<polygon fill="none" stroke="#000000" points="299.7042,-412 130.2958,-412 130.2958,-416 118.2958,-416 118.2958,-376 299.7042,-376 299.7042,-412"/>
<polyline fill="none" stroke="#000000" points="118.2958,-412 130.2958,-412 "/>
<text text-anchor="middle" x="209" y="-389.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (ReferenceSet)</text>
</g>
<!-- c4_store&#45;&gt;c4_dsf_02 -->
<g id="edge27" class="edge">
<title>c4_store&#45;&gt;c4_dsf_02</title>
<path fill="none" stroke="#000000" d="M263.579,-447.8314C254.8471,-439.219 244.2683,-428.7851 234.7577,-419.4048"/>
<polygon fill="#000000" stroke="#000000" points="236.9925,-416.6931 227.4151,-412.1628 232.077,-421.6769 236.9925,-416.6931"/>
</g>
<!-- c4_dsf_03 -->
<g id="node26" class="node">
<title>c4_dsf_03</title>
<polygon fill="none" stroke="#000000" points="446.6348,-412 329.3652,-412 329.3652,-416 317.3652,-416 317.3652,-376 446.6348,-376 446.6348,-412"/>
<polyline fill="none" stroke="#000000" points="317.3652,-412 329.3652,-412 "/>
<text text-anchor="middle" x="382" y="-389.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Log)</text>
</g>
<!-- c4_store&#45;&gt;c4_dsf_03 -->
<g id="edge28" class="edge">
<title>c4_store&#45;&gt;c4_dsf_03</title>
<path fill="none" stroke="#000000" d="M306.4628,-448.3868C319.1041,-439.2851 334.7224,-428.0399 348.4705,-418.1412"/>
<polygon fill="#000000" stroke="#000000" points="350.6753,-420.8666 356.7456,-412.1832 346.5851,-415.1859 350.6753,-420.8666"/>
</g>
<!-- c4_ep1 -->
<g id="node24" class="node">
<title>c4_ep1</title>
<polygon fill="none" stroke="#000000" points="282,-628 117.8661,-610 282,-592 446.1339,-610 282,-628"/>
<text text-anchor="middle" x="282" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">Path /path/to/referenceset.xml</text>
</g>
<!-- c4_ep1&#45;&gt;c4_Job -->
<g id="edge25" class="edge">
<title>c4_ep1&#45;&gt;c4_Job</title>
<path fill="none" stroke="#000000" d="M282,-591.8314C282,-584.131 282,-574.9743 282,-566.4166"/>
<polygon fill="#000000" stroke="#000000" points="285.5001,-566.4132 282,-556.4133 278.5001,-566.4133 285.5001,-566.4132"/>
</g>
<!-- c1_ep2 -->
<g id="node42" class="node">
<title>c1_ep2</title>
<polygon fill="none" stroke="#000000" points="209,-340 62.2852,-322 209,-304 355.7148,-322 209,-340"/>
<text text-anchor="middle" x="209" y="-317.8" font-family="Times,serif" font-size="14.00" fill="#000000">EntryPoint (ReferenceSet)</text>
</g>
<!-- c4_dsf_02&#45;&gt;c1_ep2 -->
<g id="edge54" class="edge">
<title>c4_dsf_02&#45;&gt;c1_ep2</title>
<path fill="none" stroke="#000000" d="M209,-375.8314C209,-368.131 209,-358.9743 209,-350.4166"/>
<polygon fill="#000000" stroke="#000000" points="212.5001,-350.4132 209,-340.4133 205.5001,-350.4133 212.5001,-350.4132"/>
</g>
<!-- c5_Job -->
<g id="node27" class="node">
<title>c5_Job</title>
<polygon fill="none" stroke="#0000ff" points="1637.7273,-826 1580.3637,-844 1465.6363,-844 1408.2727,-826 1465.6363,-808 1580.3637,-808 1637.7273,-826"/>
<text text-anchor="middle" x="1523" y="-821.8" font-family="Times,serif" font-size="14.00" fill="#000000">Copy (and filter) DataSet</text>
</g>
<!-- c5_store -->
<g id="node28" class="node">
<title>c5_store</title>
<path fill="none" stroke="#000000" d="M1558.9795,-768.7273C1558.9795,-770.5331 1542.8529,-772 1523,-772 1503.1471,-772 1487.0205,-770.5331 1487.0205,-768.7273 1487.0205,-768.7273 1487.0205,-739.2727 1487.0205,-739.2727 1487.0205,-737.4669 1503.1471,-736 1523,-736 1542.8529,-736 1558.9795,-737.4669 1558.9795,-739.2727 1558.9795,-739.2727 1558.9795,-768.7273 1558.9795,-768.7273"/>
<path fill="none" stroke="#000000" d="M1558.9795,-768.7273C1558.9795,-766.9214 1542.8529,-765.4545 1523,-765.4545 1503.1471,-765.4545 1487.0205,-766.9214 1487.0205,-768.7273"/>
<text text-anchor="middle" x="1523" y="-749.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c5_Job&#45;&gt;c5_store -->
<g id="edge30" class="edge">
<title>c5_Job&#45;&gt;c5_store</title>
<path fill="none" stroke="#000000" d="M1523,-807.8314C1523,-800.131 1523,-790.9743 1523,-782.4166"/>
<polygon fill="#000000" stroke="#000000" points="1526.5001,-782.4132 1523,-772.4133 1519.5001,-782.4133 1526.5001,-782.4132"/>
</g>
<!-- c5_dsf_02 -->
<g id="node30" class="node">
<title>c5_dsf_02</title>
<polygon fill="none" stroke="#000000" points="1540.349,-700 1381.651,-700 1381.651,-704 1369.651,-704 1369.651,-664 1540.349,-664 1540.349,-700"/>
<polyline fill="none" stroke="#000000" points="1369.651,-700 1381.651,-700 "/>
<text text-anchor="middle" x="1455" y="-677.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (SubreadSet)</text>
</g>
<!-- c5_store&#45;&gt;c5_dsf_02 -->
<g id="edge31" class="edge">
<title>c5_store&#45;&gt;c5_dsf_02</title>
<path fill="none" stroke="#000000" d="M1505.8407,-735.8314C1497.8489,-727.3694 1488.1962,-717.1489 1479.4584,-707.8971"/>
<polygon fill="#000000" stroke="#000000" points="1481.8011,-705.2802 1472.3903,-700.4133 1476.712,-710.0866 1481.8011,-705.2802"/>
</g>
<!-- c5_dsf_04 -->
<g id="node31" class="node">
<title>c5_dsf_04</title>
<polygon fill="none" stroke="#000000" points="1503.6868,-628 1350.3132,-628 1350.3132,-632 1338.3132,-632 1338.3132,-592 1503.6868,-592 1503.6868,-628"/>
<polyline fill="none" stroke="#000000" points="1338.3132,-628 1350.3132,-628 "/>
<text text-anchor="middle" x="1421" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_01)</text>
</g>
<!-- c5_store&#45;&gt;c5_dsf_04 -->
<g id="edge32" class="edge">
<title>c5_store&#45;&gt;c5_dsf_04</title>
<path fill="none" stroke="#000000" d="M1486.9357,-747.0542C1443.9126,-737.9499 1375.9077,-720.6833 1361,-700 1351.6446,-687.0201 1354.4906,-678.616 1361,-664 1366.0334,-652.6983 1374.7305,-642.7696 1383.9659,-634.611"/>
<polygon fill="#000000" stroke="#000000" points="1386.2815,-637.2382 1391.7816,-628.1828 1381.835,-631.8319 1386.2815,-637.2382"/>
</g>
<!-- c5_dsf_05 -->
<g id="node32" class="node">
<title>c5_dsf_05</title>
<polygon fill="none" stroke="#000000" points="1687.6868,-628 1534.3132,-628 1534.3132,-632 1522.3132,-632 1522.3132,-592 1687.6868,-592 1687.6868,-628"/>
<polyline fill="none" stroke="#000000" points="1522.3132,-628 1534.3132,-628 "/>
<text text-anchor="middle" x="1605" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_02)</text>
</g>
<!-- c5_store&#45;&gt;c5_dsf_05 -->
<g id="edge33" class="edge">
<title>c5_store&#45;&gt;c5_dsf_05</title>
<path fill="none" stroke="#000000" d="M1531.5643,-735.7416C1540.3816,-717.3218 1554.8005,-688.2208 1569,-664 1574.3289,-654.9102 1580.5685,-645.2555 1586.3422,-636.6616"/>
<polygon fill="#000000" stroke="#000000" points="1589.3972,-638.3936 1592.1328,-628.158 1583.6113,-634.4536 1589.3972,-638.3936"/>
</g>
<!-- c5_dsf_03 -->
<g id="node33" class="node">
<title>c5_dsf_03</title>
<polygon fill="none" stroke="#000000" points="1707.6348,-700 1590.3652,-700 1590.3652,-704 1578.3652,-704 1578.3652,-664 1707.6348,-664 1707.6348,-700"/>
<polyline fill="none" stroke="#000000" points="1578.3652,-700 1590.3652,-700 "/>
<text text-anchor="middle" x="1643" y="-677.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Log)</text>
</g>
<!-- c5_store&#45;&gt;c5_dsf_03 -->
<g id="edge34" class="edge">
<title>c5_store&#45;&gt;c5_dsf_03</title>
<path fill="none" stroke="#000000" d="M1551.743,-736.7542C1567.4491,-727.3306 1587.1197,-715.5282 1604.1728,-705.2963"/>
<polygon fill="#000000" stroke="#000000" points="1606.0428,-708.256 1612.817,-700.1098 1602.4413,-702.2536 1606.0428,-708.256"/>
</g>
<!-- c5_ep1&#45;&gt;c5_Job -->
<g id="edge29" class="edge">
<title>c5_ep1&#45;&gt;c5_Job</title>
<path fill="none" stroke="#000000" d="M1523,-879.8314C1523,-872.131 1523,-862.9743 1523,-854.4166"/>
<polygon fill="#000000" stroke="#000000" points="1526.5001,-854.4132 1523,-844.4133 1519.5001,-854.4133 1526.5001,-854.4132"/>
</g>
<!-- c5_dsf_02&#45;&gt;c3_ep1 -->
<g id="edge57" class="edge">
<title>c5_dsf_02&#45;&gt;c3_ep1</title>
<path fill="none" stroke="#000000" d="M1369.3915,-663.9243C1312.1209,-651.832 1237.694,-636.1172 1183.9755,-624.7749"/>
<polygon fill="#000000" stroke="#000000" points="1184.3647,-621.28 1173.8573,-622.6385 1182.9185,-628.129 1184.3647,-621.28"/>
</g>
<!-- c5_dsf_02&#45;&gt;c5_dsf_04 -->
<g id="edge35" class="edge">
<title>c5_dsf_02&#45;&gt;c5_dsf_04</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M1446.4204,-663.8314C1442.6642,-655.8771 1438.1743,-646.369 1434.0202,-637.5723"/>
<polygon fill="#000000" stroke="#000000" points="1437.1301,-635.9612 1429.6952,-628.4133 1430.8004,-638.9503 1437.1301,-635.9612"/>
</g>
<!-- c5_dsf_02&#45;&gt;c5_dsf_05 -->
<g id="edge36" class="edge">
<title>c5_dsf_02&#45;&gt;c5_dsf_05</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M1492.8513,-663.8314C1512.7346,-654.2874 1537.2782,-642.5065 1558.3305,-632.4013"/>
<polygon fill="#000000" stroke="#000000" points="1559.9202,-635.5207 1567.4209,-628.038 1556.891,-629.21 1559.9202,-635.5207"/>
</g>
<!-- c6_ep1 -->
<g id="node36" class="node">
<title>c6_ep1</title>
<polygon fill="none" stroke="#000000" points="1863,-628 1732.431,-610 1863,-592 1993.569,-610 1863,-628"/>
<text text-anchor="middle" x="1863" y="-605.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataSet UUIDs=X,Y,Z</text>
</g>
<!-- c5_dsf_02&#45;&gt;c6_ep1 -->
<g id="edge58" class="edge">
<title>c5_dsf_02&#45;&gt;c6_ep1</title>
<path fill="none" stroke="#000000" d="M1540.3615,-667.3605C1592.3117,-658.4068 1660.003,-646.6614 1720,-636 1745.1082,-631.5383 1772.6784,-626.5473 1796.8537,-622.1403"/>
<polygon fill="#000000" stroke="#000000" points="1797.7804,-625.529 1806.9896,-620.2905 1796.5237,-618.6428 1797.7804,-625.529"/>
</g>
<!-- c6_Job -->
<g id="node34" class="node">
<title>c6_Job</title>
<polygon fill="none" stroke="#0000ff" points="1981.6751,-538 1922.3376,-556 1803.6624,-556 1744.3249,-538 1803.6624,-520 1922.3376,-520 1981.6751,-538"/>
<text text-anchor="middle" x="1863" y="-533.8" font-family="Times,serif" font-size="14.00" fill="#000000">Export DataSet(s) Zip Job</text>
</g>
<!-- c6_store -->
<g id="node35" class="node">
<title>c6_store</title>
<path fill="none" stroke="#000000" d="M1898.9795,-480.7273C1898.9795,-482.5331 1882.8529,-484 1863,-484 1843.1471,-484 1827.0205,-482.5331 1827.0205,-480.7273 1827.0205,-480.7273 1827.0205,-451.2727 1827.0205,-451.2727 1827.0205,-449.4669 1843.1471,-448 1863,-448 1882.8529,-448 1898.9795,-449.4669 1898.9795,-451.2727 1898.9795,-451.2727 1898.9795,-480.7273 1898.9795,-480.7273"/>
<path fill="none" stroke="#000000" d="M1898.9795,-480.7273C1898.9795,-478.9214 1882.8529,-477.4545 1863,-477.4545 1843.1471,-477.4545 1827.0205,-478.9214 1827.0205,-480.7273"/>
<text text-anchor="middle" x="1863" y="-461.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c6_Job&#45;&gt;c6_store -->
<g id="edge38" class="edge">
<title>c6_Job&#45;&gt;c6_store</title>
<path fill="none" stroke="#000000" d="M1863,-519.8314C1863,-512.131 1863,-502.9743 1863,-494.4166"/>
<polygon fill="#000000" stroke="#000000" points="1866.5001,-494.4132 1863,-484.4133 1859.5001,-494.4133 1866.5001,-494.4132"/>
</g>
<!-- c6_dsf_02 -->
<g id="node37" class="node">
<title>c6_dsf_02</title>
<polygon fill="none" stroke="#000000" points="1945.9004,-412 1744.0996,-412 1744.0996,-416 1732.0996,-416 1732.0996,-376 1945.9004,-376 1945.9004,-412"/>
<polyline fill="none" stroke="#000000" points="1732.0996,-412 1744.0996,-412 "/>
<text text-anchor="middle" x="1839" y="-389.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile DataSet XML(s) Zip</text>
</g>
<!-- c6_store&#45;&gt;c6_dsf_02 -->
<g id="edge39" class="edge">
<title>c6_store&#45;&gt;c6_dsf_02</title>
<path fill="none" stroke="#000000" d="M1856.9438,-447.8314C1854.3488,-440.0463 1851.2576,-430.7729 1848.3782,-422.1347"/>
<polygon fill="#000000" stroke="#000000" points="1851.6205,-420.7933 1845.1378,-412.4133 1844.9797,-423.0069 1851.6205,-420.7933"/>
</g>
<!-- c6_dsf_03 -->
<g id="node38" class="node">
<title>c6_dsf_03</title>
<polygon fill="none" stroke="#000000" points="2093.6348,-412 1976.3652,-412 1976.3652,-416 1964.3652,-416 1964.3652,-376 2093.6348,-376 2093.6348,-412"/>
<polyline fill="none" stroke="#000000" points="1964.3652,-412 1976.3652,-412 "/>
<text text-anchor="middle" x="2029" y="-389.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Log)</text>
</g>
<!-- c6_store&#45;&gt;c6_dsf_03 -->
<g id="edge40" class="edge">
<title>c6_store&#45;&gt;c6_dsf_03</title>
<path fill="none" stroke="#000000" d="M1899.0195,-450.3771C1922.2043,-440.321 1952.6814,-427.102 1978.2964,-415.9919"/>
<polygon fill="#000000" stroke="#000000" points="1979.7095,-419.1941 1987.491,-412.0039 1976.924,-412.7722 1979.7095,-419.1941"/>
</g>
<!-- c6_ep1&#45;&gt;c6_Job -->
<g id="edge37" class="edge">
<title>c6_ep1&#45;&gt;c6_Job</title>
<path fill="none" stroke="#000000" d="M1863,-591.8314C1863,-584.131 1863,-574.9743 1863,-566.4166"/>
<polygon fill="#000000" stroke="#000000" points="1866.5001,-566.4132 1863,-556.4133 1859.5001,-566.4133 1866.5001,-566.4132"/>
</g>
<!-- c1_Job -->
<g id="node39" class="node">
<title>c1_Job</title>
<polygon fill="none" stroke="#0000ff" points="418.2987,-250 386.1493,-268 321.8507,-268 289.7013,-250 321.8507,-232 386.1493,-232 418.2987,-250"/>
<text text-anchor="middle" x="354" y="-245.8" font-family="Times,serif" font-size="14.00" fill="#000000">Analysis Job</text>
</g>
<!-- c1_store -->
<g id="node40" class="node">
<title>c1_store</title>
<path fill="none" stroke="#000000" d="M389.9795,-192.7273C389.9795,-194.5331 373.8529,-196 354,-196 334.1471,-196 318.0205,-194.5331 318.0205,-192.7273 318.0205,-192.7273 318.0205,-163.2727 318.0205,-163.2727 318.0205,-161.4669 334.1471,-160 354,-160 373.8529,-160 389.9795,-161.4669 389.9795,-163.2727 389.9795,-163.2727 389.9795,-192.7273 389.9795,-192.7273"/>
<path fill="none" stroke="#000000" d="M389.9795,-192.7273C389.9795,-190.9214 373.8529,-189.4545 354,-189.4545 334.1471,-189.4545 318.0205,-190.9214 318.0205,-192.7273"/>
<text text-anchor="middle" x="354" y="-173.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStore</text>
</g>
<!-- c1_Job&#45;&gt;c1_store -->
<g id="edge43" class="edge">
<title>c1_Job&#45;&gt;c1_store</title>
<path fill="none" stroke="#000000" d="M354,-231.8314C354,-224.131 354,-214.9743 354,-206.4166"/>
<polygon fill="#000000" stroke="#000000" points="357.5001,-206.4132 354,-196.4133 350.5001,-206.4133 357.5001,-206.4132"/>
</g>
<!-- c1_dsf_01 -->
<g id="node43" class="node">
<title>c1_dsf_01</title>
<polygon fill="none" stroke="#000000" points="172.1348,-124 47.8652,-124 47.8652,-128 35.8652,-128 35.8652,-88 172.1348,-88 172.1348,-124"/>
<polyline fill="none" stroke="#000000" points="35.8652,-124 47.8652,-124 "/>
<text text-anchor="middle" x="104" y="-101.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Fasta)</text>
</g>
<!-- c1_store&#45;&gt;c1_dsf_01 -->
<g id="edge44" class="edge">
<title>c1_store&#45;&gt;c1_dsf_01</title>
<path fill="none" stroke="#000000" d="M317.9148,-167.6075C281.0526,-156.9911 222.801,-140.2147 176.3873,-126.8475"/>
<polygon fill="#000000" stroke="#000000" points="177.2528,-123.4546 166.6747,-124.0503 175.3155,-130.1812 177.2528,-123.4546"/>
</g>
<!-- c1_dsf_02 -->
<g id="node44" class="node">
<title>c1_dsf_02</title>
<polygon fill="none" stroke="#000000" points="700.3558,-124 527.6442,-124 527.6442,-128 515.6442,-128 515.6442,-88 700.3558,-88 700.3558,-124"/>
<polyline fill="none" stroke="#000000" points="515.6442,-124 527.6442,-124 "/>
<text text-anchor="middle" x="608" y="-101.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (AlignmentSet)</text>
</g>
<!-- c1_store&#45;&gt;c1_dsf_02 -->
<g id="edge45" class="edge">
<title>c1_store&#45;&gt;c1_dsf_02</title>
<path fill="none" stroke="#000000" d="M390.1223,-167.7606C427.4596,-157.1768 486.8038,-140.3548 534.1354,-126.938"/>
<polygon fill="#000000" stroke="#000000" points="535.3745,-130.2247 544.0409,-124.1301 533.4654,-123.4901 535.3745,-130.2247"/>
</g>
<!-- c1_dsf_03 -->
<g id="node45" class="node">
<title>c1_dsf_03</title>
<polygon fill="none" stroke="#000000" points="497.8175,-124 376.1825,-124 376.1825,-128 364.1825,-128 364.1825,-88 497.8175,-88 497.8175,-124"/>
<polyline fill="none" stroke="#000000" points="364.1825,-124 376.1825,-124 "/>
<text text-anchor="middle" x="431" y="-101.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (VCF)</text>
</g>
<!-- c1_store&#45;&gt;c1_dsf_03 -->
<g id="edge46" class="edge">
<title>c1_store&#45;&gt;c1_dsf_03</title>
<path fill="none" stroke="#000000" d="M373.4304,-159.8314C382.732,-151.1337 394.0204,-140.5783 404.1286,-131.1265"/>
<polygon fill="#000000" stroke="#000000" points="406.6621,-133.5493 411.5759,-124.1628 401.8811,-128.4363 406.6621,-133.5493"/>
</g>
<!-- c1_dsf_04 -->
<g id="node46" class="node">
<title>c1_dsf_04</title>
<polygon fill="none" stroke="#000000" points="883.6868,-124 730.3132,-124 730.3132,-128 718.3132,-128 718.3132,-88 883.6868,-88 883.6868,-124"/>
<polyline fill="none" stroke="#000000" points="718.3132,-124 730.3132,-124 "/>
<text text-anchor="middle" x="801" y="-101.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_01)</text>
</g>
<!-- c1_store&#45;&gt;c1_dsf_04 -->
<g id="edge47" class="edge">
<title>c1_store&#45;&gt;c1_dsf_04</title>
<path fill="none" stroke="#000000" d="M390.1117,-173.0212C452.6812,-164.283 585.5162,-145.2413 708.3248,-124.0796"/>
<polygon fill="#000000" stroke="#000000" points="709.0224,-127.5109 718.2785,-122.3558 707.8279,-120.6136 709.0224,-127.5109"/>
</g>
<!-- c1_dsf_05 -->
<g id="node47" class="node">
<title>c1_dsf_05</title>
<polygon fill="none" stroke="#000000" points="273.6868,-52 120.3132,-52 120.3132,-56 108.3132,-56 108.3132,-16 273.6868,-16 273.6868,-52"/>
<polyline fill="none" stroke="#000000" points="108.3132,-52 120.3132,-52 "/>
<text text-anchor="middle" x="191" y="-29.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (Report_02)</text>
</g>
<!-- c1_store&#45;&gt;c1_dsf_05 -->
<g id="edge48" class="edge">
<title>c1_store&#45;&gt;c1_dsf_05</title>
<path fill="none" stroke="#000000" d="M317.768,-169.9006C277.3724,-160.0995 215.7572,-142.5856 201,-124 187.3901,-106.8594 185.6255,-81.7447 186.8478,-62.3982"/>
<polygon fill="#000000" stroke="#000000" points="190.343,-62.6105 187.7778,-52.3309 183.3727,-61.9665 190.343,-62.6105"/>
</g>
<!-- c1_dsf_06 -->
<g id="node48" class="node">
<title>c1_dsf_06</title>
<polygon fill="none" stroke="#000000" points="345.8559,-124 222.1441,-124 222.1441,-128 210.1441,-128 210.1441,-88 345.8559,-88 345.8559,-124"/>
<polyline fill="none" stroke="#000000" points="210.1441,-124 222.1441,-124 "/>
<text text-anchor="middle" x="278" y="-101.8" font-family="Times,serif" font-size="14.00" fill="#000000">DataStoreFile (LOG)</text>
</g>
<!-- c1_store&#45;&gt;c1_dsf_06 -->
<g id="edge49" class="edge">
<title>c1_store&#45;&gt;c1_dsf_06</title>
<path fill="none" stroke="#000000" d="M334.822,-159.8314C325.6412,-151.1337 314.4994,-140.5783 304.5224,-131.1265"/>
<polygon fill="#000000" stroke="#000000" points="306.8385,-128.4995 297.1719,-124.1628 302.0243,-133.5811 306.8385,-128.4995"/>
</g>
<!-- c1_ep1&#45;&gt;c1_Job -->
<g id="edge41" class="edge">
<title>c1_ep1&#45;&gt;c1_Job</title>
<path fill="none" stroke="#000000" d="M689.4335,-311.4535C615.4159,-297.893 487.5519,-274.4675 412.896,-260.7901"/>
<polygon fill="#000000" stroke="#000000" points="413.1931,-257.2864 402.726,-258.9269 411.9316,-264.1718 413.1931,-257.2864"/>
</g>
<!-- c1_ep1&#45;&gt;c1_dsf_04 -->
<g id="edge50" class="edge">
<title>c1_ep1&#45;&gt;c1_dsf_04</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M751.419,-304.3238C760.7899,-266.8404 782.7194,-179.1223 794.0098,-133.9606"/>
<polygon fill="#000000" stroke="#000000" points="797.4137,-134.7759 796.4436,-124.2255 790.6227,-133.0781 797.4137,-134.7759"/>
</g>
<!-- c1_ep2&#45;&gt;c1_Job -->
<g id="edge42" class="edge">
<title>c1_ep2&#45;&gt;c1_Job</title>
<path fill="none" stroke="#000000" d="M238.3429,-307.4297C259.1636,-297.0912 287.4506,-283.0452 310.838,-271.4322"/>
<polygon fill="#000000" stroke="#000000" points="312.4636,-274.5328 319.8636,-266.9505 309.3504,-268.2631 312.4636,-274.5328"/>
</g>
<!-- c1_ep2&#45;&gt;c1_dsf_05 -->
<g id="edge52" class="edge">
<title>c1_ep2&#45;&gt;c1_dsf_05</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M186.6668,-306.7236C145.653,-277.1379 59.5561,-207.5257 27,-124 21.1894,-109.0924 17.6209,-100.9627 27,-88 36.3556,-75.0698 66.3032,-63.5534 98.0934,-54.5117"/>
<polygon fill="#000000" stroke="#000000" points="99.2847,-57.8141 107.9977,-51.7863 97.4275,-51.065 99.2847,-57.8141"/>
</g>
<!-- c1_dsf_02&#45;&gt;c1_dsf_05 -->
<g id="edge51" class="edge">
<title>c1_dsf_02&#45;&gt;c1_dsf_05</title>
<path fill="none" stroke="#000000" stroke-dasharray="1,5" d="M515.8063,-89.5416C512.8365,-89.0203 509.895,-88.5053 507,-88 431.9277,-74.8961 346.7615,-60.3616 283.7756,-49.6755"/>
<polygon fill="#000000" stroke="#000000" points="284.3066,-46.2157 273.8621,-47.9943 283.1361,-53.1172 284.3066,-46.2157"/>
</g>
</g>
</svg>

Arch Overview

Core Nouns of the PacBio System

  1. Run (often created/edited from SMRT Link RunDesign, stored as XML)
  2. CollectionMetadata a Run has a list of Collection (Primary Analysis will convert a CollectionMetadata to a SubreadSet)
  3. PacBio DataSets SubreadSet, ReferenceSet, etc... These are thin-ish XML files that have general metadata as well as pointers to 'external resources' (e.g., BAM, Fasta files) and their companion index files.
  4. SMRT Link Job A general (async) unit of work to perform operations on PacBio DataSets
  5. ** DataStoreFile** a container for output files from a SMRT Link Job and contains metadata, such as file type, size, path. A list of DataStore Files is called a DataStore. This is the core output of SMRT Link Job.
  6. ** Report** a Report is general model to capture Report metrics (also referred to as 'Attributes'), Report Tables and Report Plot Groups. A Report is a specific type of DataStoreFile and are used to communicate details of a SMRT Link Job to the SMRT Link UI (and webservices.)

Second tier models, such as Report View Rules, or Pipeline View Rules are not discussed here.

General Workflow starting from PA

ICS/PA takes a Run XML with a list of Collections, converts each CollectionMeta into a SubreadSet. The SubreadSet is copied from ICS/PA file system into the customer storage on NFS (accessible by the companion SMRT Link instance) and the SubreadSet XML is imported into SMRT Link using the import-dataset Job type in SMRT Link. The Reports for the SubreadSet XML emitted from the import-dataset job show up in RunQC as well as in DataManagement in SMRT Link.

Show below is a sketch of the dataflow.

ICS and Primary Analysis DataFlow to Generate a SubreadSets for a given Run

General SMRT Link Job Model

Simplify, the general interface of a SMRT Link Job, for type T,

A Job takes T as input and produces a PB (T -> Job -> DataStore)

List of EntryPoint PB DataSet -> Job -> DataStore

A DataStore is a list of DataStore files.

Each DataStoreFile can be a different file types, such as, PB DataSet, VCF, ReportJSON, Fasta, etc... and also contains the specific ob id and UUID that generated the DataStoreFile.

During and after SMRT Link Job execution, the DataStoreFiles will be imorted into the db, the DataStoreFile. For a specific subset of file types (PB DataSet types), additional metadata will be stored in the SMRT Link database. Each DataSet has metadata about the specific dataset type as well as metadata about a possible 'parent' DataSet. The DataSet 'parentage' can be a result from copying, merging, analysis (the semantics are not consistent).

Report Details

Each ReportJson file type contains a list of PB Dataset UUIDs in the data model. This is used to communicate which DataSets are specific to the input(s) of a specific ReportJSON. Alternatively said, the EntryPoint PB DataSet(s) might not be directly used to compute the ReportJson* datastore file..

Example Jobs

NOTE, the dotted arrow represents the relation between the Report and the source input for the task at the Report JSON level. This is NOT captured at the SMRT Link Server level.

Import DataSet Job

Import DataSet Job

Accessing the Reports and the source DataSet is clearly defined here by only depending on the Job Id.

I believe the Merge DataSet Job type is Similar.

Example Resequencing Job

Analysis Job

Example: Larger Picture of DataFlow in SMRT Link using SMRT Link Jobs

Simple Example

To perform a standard Resequencing Job, the user can run two different import-dataset SMRT Link Jobs, then a pbsmrtpipe (i.e., 'Analysis') SMRT Link Job can be performed.

Steps:

  1. Import SubreadSet
  2. Import ReferenceSet
  3. Run Analysis Job to run the Resequencing Analysis

Import DataSets and Perform Resequencing Analysis

(Each Job type is shown in its own box)

Advanced Example

To demonstrate a larger dataflow example, consider the following case. A user would like to import SubreadSet alpha and beta, perform filtering on beta, merge the datasets, perform a Resequencing analysis on the merged subreadset and export the filtered SubreadSet as a zip.

Steps:

  1. Import ReferenceSet, SubreadSet alpha and Beta
  2. Create a filtered SubreadSet from SubreadSet alpha
  3. Create a Merged SubreadSet from SubreadSet Beta and the output of #2
  4. Create an Analysis Job using #3 and ReferenceSet from #1
  5. Create a DataSet XML(s) ZIP from the output of #3

Advanced Dataflow Example

This demonstrates graph nature of the design and composibility of different SMRT Link Job types. Note that data provenance is for free in the model.

digraph {
subgraph cluster_0 {
c0_Job [shape=hexagon, color=blue, label="Import DataSet"]
c0_store [shape=cylinder, label="DataStore"]
c0_ep1 [shape=diamond, label="Path /path/to/subreadset.xml"]
c0_ep1 -> c0_Job
c0_Job -> c0_store
c0_dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
c0_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c0_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c0_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c0_store -> c0_dsf_02
c0_store -> c0_dsf_04
c0_store -> c0_dsf_05
c0_store -> c0_dsf_03
c0_dsf_02 -> c0_dsf_04 [style=dotted]
c0_dsf_02 -> c0_dsf_05 [style=dotted]
}
subgraph cluster_2 {
c2_Job [shape=hexagon, color=blue, label="Import DataSet"]
c2_store [shape=cylinder, label="DataStore"]
c2_ep1 [shape=diamond, label="Path /path/to/referenceset.xml"]
c2_ep1 -> c2_Job
c2_Job -> c2_store
c2_dsf_02 [shape=tab, label="DataStoreFile (ReferenceSet)"]
c2_dsf_03 [shape=tab, label="DataStoreFile (Log)"]
c2_store -> c2_dsf_02
c2_store -> c2_dsf_03
}
subgraph cluster_02 {
c1_Job [shape=hexagon, color=blue, label="Analysis Job"]
c1_store [shape=cylinder, label="DataStore"]
c1_ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
c1_ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
c1_ep1 -> c1_Job
c1_ep2 -> c1_Job
c1_Job -> c1_store
c1_dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
c1_dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
c1_dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
c1_dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
c1_dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
c1_dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
c1_store -> c1_dsf_01
c1_store -> c1_dsf_02
c1_store -> c1_dsf_03
c1_store -> c1_dsf_04
c1_store -> c1_dsf_05
c1_store -> c1_dsf_06
c1_ep1 -> c1_dsf_04 [style=dotted]
c1_dsf_02 -> c1_dsf_05 [style=dotted]
c1_ep2 -> c1_dsf_05 [style=dotted]
}
c2_dsf_02 -> c1_ep2
c0_dsf_02 -> c1_ep1
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Notes on SMRT Link DataSet, Job, DataStoreFile, Report models

Current Model for SMRT Link 'Job' model

Simplify, the general interface of a SMRT Link Job, for type T,

A Job takes T as input and produces a PB (T -> Job -> DataStore)

List of EntryPoint PB DataSet -> Job -> DataStore

A DataStore is a list of DataStore files.

Each DataStoreFile can be a different file types, such as, PB DataSet, VCF, ReportJSON, Fasta, etc... and also contains the specific ob id and UUID that generated the DataStoreFile.

During and after SMRT Link Job execution, the DataStoreFiles will be imorted into the db, the DataStoreFile. For a specific subset of file types (PB DataSet types), additional metadata will be stored in the SMRT Link database. Each DataSet has metadata about the specific dataset type as well as metadata about a possible 'parent' DataSet. The DataSet 'parentage' can be a result from copying, merging, analysis (the semantics are not consistent).

Report Details

Each ReportJson file type contains a list of PB Dataset UUIDs in the data model. This is used to communicate which DataSets are specific to the input(s) of a specific ReportJSON. Alternatively said, the EntryPoint PB DataSet(s) might not be directly used to compute the ReportJson* datastore file..

NOTE This is the core issue. Currently the system only communicates the DataSet Job Id

Example Jobs

NOTE, the dotted arrow represents the relation between the Report and the source input for the task at the Report JSON level. This is NOT captured at the SMRT Link Server level.

Import DataSet Job

Import DataSet Job

Accessing the Reports and the source DataSet is clearly defined here by only depending on the Job Id.

I believe the Merge DataSet Job type is Similar.

Example Resequencing Job

Analysis Job

The use case here is often to view all the output'ed Reports and links back to the source DataSet is not necessary.

However, viewing the AlignmentSet in DM will start yield unexpected results. This is why the SMRT Link has a workaround to filter all the Reports from the Job and only show the DataSets where for the AlignmentSet in interest. This works for small number of Reports, but DOES NOT work for an Job that outputs "many" reports (because of the explicit filtering necessary on the client side).

Example Demux Analysis Job

Demux Analysis Job

The model is more involved when N DataSets and N (or more) companion reports per DataSet are emitted.

The core issue is that a specific DataSet of the N will return

Job Output Access Points

From the file system access point, the DataStore* is accessible in the datastore.json file in the SMRT Link Job directory (the path of this is not consistent, but it's often in the root directory of the Job).

From the webservices, the datastore files are accessible from DM where DS-TYPE' is the DataSet type 'short name' (e.g., subreads) and DS-IDABLE is the DataSet (local) integer id, or (global) UUID.

smrt-link/datasets/<DS-TYPE>/<DS-IDABLE>/reports

NOTE The reports interface is the core issue because it assumes the Job Id link. I believe the SMRT Link UI is filtering to get around this, however, this is not scalable because the SMRT Link UI has to fetch the details of all the reports, then filter out based on the DataSet UUID in the report.

And from the Jobs context.

smrt-link/job-manager/jobs/<JOB-TYPE>/<JOB-IDABLE>/reports

The Job is not a problem and the interface does NOT need to be changed. Semantically, the interface captures exactly what is expected.

Possible Solution

Capture new Report -> DataSet(s) relation

  • Add new table to capture DataStoreFile -> Set(DataStoreFile) relation
  • On import parse Report and assign
  • Update /smrt-link/<DS-TYPE>/<DS-IDABLE>/reports to also filter by 'parent' DataStore File UUID(s)

This is straight forward, but this now requires a join to get the reports for a specific DataSet.

Legacy Data

Need to handle legacy data, specifically for the smrt-link/datasets/<DS-TYPE>/<DS-IDABLE>/reports webservice endpoint.

Possible Solutions

  1. During db migration on "start/upgrade", parse the Report JSON files and extract the DataSet UUID from the Report JSON file on disk and update the database.

    • BAD. This is expensive (could be parsing 1000's of report JSON files)
    • BAD. The dataset_uuids field might not be populated consistently. Not clear when that was added in the SMRT Link version.
    • GOOD. Potentially parsing the raw data removes guesswork at the job level (See #3 and the above issue is resolved)
  2. Hide the details in the API and dispatch on lookup based on the Job Version. (i.e., if job > 6.0.0, do X else do Y to get reports)

    • BAD. Fundamentally has different semantic results.
    • This is probably very difficult to debug when it's not working as expected. The dispatch from different SL versions would requires an extra join to the engine_jobs table.
    • GOOD. Potentially least amount of db migration machinery
  3. During db migration, attempt a thinner approach to migrate old data and assign Report -> DataSet relation based on the Job type

    • import-dataset (Look at the output DataStore file, get the (single) DataSet DataStoreFile, get the List of Reports, then update the DB
    • merge-datasets (Similar to import-dataset)
    • analysis job (Use the Entry Point(s) to get the UUID(s) and assign to all output Reports) (Note, this is not correct. The SL UI would still have to keep the legacy filtering model in place)
    • Other job types (Don't support?)

GOOD. Thin-ish migration BAD. Edge cases on capturing the

strict digraph {
Job [shape=hexagon, color=blue, label="Demux Analysis Job"]
store [shape=cylinder, label="DataStore"]
ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
ep2 [shape=diamond, label="EntryPoint (BarcodeSet)"]
ep1 -> Job
ep2 -> Job
Job -> store
s1 [shape=tab, label="DataStoreFile (SubreadSet_01)"]
s2 [shape=tab, label="DataStoreFile (SubreadSet_02)"]
s3 [shape=tab, label="DataStoreFile (SubreadSet_03)"]
r1 [shape=tab, label="DataStoreFile (Report_01)"]
r2 [shape=tab, label="DataStoreFile (Report_02)"]
r3 [shape=tab, label="DataStoreFile (Report_03)"]
dsf_01 [shape=tab, label="DataStoreFile (Log)"]
store -> dsf_01
store -> s1
store -> s2
store -> s3
store -> r1
store -> r2
store -> r3
s1 -> r1 [style=dotted]
s2 -> r2 [style=dotted]
s3 -> r3 [style=dotted]
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
strict digraph {
Run [shape=component, label="PacBio Run (XML)"]
c1 [shape=box, label="CollectionMetadata 1"]
c2 [shape=box, label="CollectionMetadata 2"]
c3 [shape=box, label="CollectionMetadata 3"]
c4 [shape=box, label="CollectionMetadata 4"]
Run -> c1
Run -> c2
Run -> c3
Run -> c4
p [shape=parallelogram, color=blue, label="Primary Analysis: Convert CollectionMeta to SubreadSet)"]
f1 [shape=tab, label="PA File (SubreadSet XML) on PA file system"]
copy_job [shape=parallelogram, color=blue, label="Primary Analysis: Copy to Customer FileSystem"]
customer_subreadset [shape=tab, label="SubreadSet XML on Customer FileSystem"]
import_job [shape=parallelogram, color=blue, label="Primary Analysis: Import SubreadSet XML into SMRT Link using import-dataset Job"]
c1 -> p
p -> f1
f1 -> copy_job
copy_job -> customer_subreadset
customer_subreadset -> import_job
}
strict digraph {
Job [shape=hexagon, color=blue, label="Import DataSet"]
store [shape=cylinder, label="DataStore"]
ep1 [shape=diamond, label="Path /path/to/subreadset.xml"]
ep1 -> Job
Job -> store
dsf_02 [shape=tab, label="DataStoreFile (SubreadSet)"]
dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
dsf_03 [shape=tab, label="DataStoreFile (Log)"]
store -> dsf_02
store -> dsf_04
store -> dsf_05
store -> dsf_03
dsf_02 -> dsf_04 [style=dotted]
dsf_02 -> dsf_05 [style=dotted]
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
default: convert
convert:
dot -Tpng demux-pbsmrtpipe-job.dot -o demux-pbsmrtpipe-job.png
dot -Tsvg demux-pbsmrtpipe-job.dot -o demux-pbsmrtpipe-job.svg
dot -Tpng import-dataset-job.dot -o import-dataset-job.png
dot -Tsvg import-dataset-job.dot -o import-dataset-job.svg
dot -Tpng pbsmrtpipe-job.dot -o pbsmrtpipe-job.png
dot -Tsvg pbsmrtpipe-job.dot -o pbsmrtpipe-job.svg
dot -Tpng connected-jobs.dot -o connected-jobs.png
dot -Tsvg connected-jobs.dot -o connected-jobs.svg
dot -Tpng advanced-jobs.dot -o advanced-jobs.png
dot -Tsvg advanced-jobs.dot -o advanced-jobs.svg
dot -Tpng ics.dot -o ics.png
dot -Tsvg system-job-running.dot -o system-job-running.svg
dot -Tpng system-job-running.dot -o system-job-running.png
clean:
rm *.png
strict digraph {
Job [shape=hexagon, color=blue, label="Analysis Job"]
store [shape=cylinder, label="DataStore"]
ep1 [shape=diamond, label="EntryPoint (SubreadSet)"]
ep2 [shape=diamond, label="EntryPoint (ReferenceSet)"]
ep1 -> Job
ep2 -> Job
Job -> store
dsf_01 [shape=tab, label="DataStoreFile (Fasta)"]
dsf_02 [shape=tab, label="DataStoreFile (AlignmentSet)"]
dsf_03 [shape=tab, label="DataStoreFile (VCF)"]
dsf_04 [shape=tab, label="DataStoreFile (Report_01)"]
dsf_05 [shape=tab, label="DataStoreFile (Report_02)"]
dsf_06 [shape=tab, label="DataStoreFile (LOG)"]
store -> dsf_01
store -> dsf_02
store -> dsf_03
store -> dsf_04
store -> dsf_05
store -> dsf_06
ep1 -> dsf_04 [style=dotted]
dsf_02 -> dsf_05 [style=dotted]
ep2 -> dsf_05 [style=dotted]
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
digraph {
postgres_db [shape=cylinder];
WorkerA [shape=diamond, color=blue];
WorkerB [shape=diamond, color=blue];
WorkerC [shape=diamond, color=blue];
WorkerD [shape=pentagon, color=green];
WorkerE [shape=pentagon, color=orange];
pbsmrtpipeJobA [shape=rectangle];
pbsmrtpipeJobB [shape=rectangle]
pbsmrtpipeJobC [shape=rectangle]
postgres_db -> SL_Services
SL_Services -> WorkerA
SL_Services -> WorkerB
SL_Services -> WorkerC
SL_Services -> WorkerD
SL_Services -> WorkerE
WorkerA -> pbsmrtpipeJobA
WorkerB -> pbsmrtpipeJobB
WorkerC -> pbsmrtpipeJobC
WorkerD -> backup_db
WorkerE -> import_dataset
pbsmrtpipeJobA -> taskA_01
pbsmrtpipeJobA -> taskA_02
pbsmrtpipeJobA -> taskA_03
pbsmrtpipeJobA -> taskA_04
pbsmrtpipeJobA -> taskA_05
taskA_01 -> sge_sync_job_A_01
taskA_02 -> sge_sync_job_A_02
taskA_03 -> sge_sync_job_A_03
taskA_04 -> sge_sync_job_A_04
taskA_05 -> sge_sync_job_A_05
pbsmrtpipeJobB -> taskB_01
pbsmrtpipeJobB -> taskB_02
taskB_01 -> sge_sync_job_B_01
taskB_02 -> sge_sync_job_B_02
pbsmrtpipeJobC -> taskC_01
pbsmrtpipeJobC -> taskC_02
taskC_01 -> sge_sync_job_C_01
taskC_02 -> sge_sync_job_C_02
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment