Skip to content

Instantly share code, notes, and snippets.

@hden
Created November 22, 2012 02:49
Show Gist options
  • Select an option

  • Save hden/4129181 to your computer and use it in GitHub Desktop.

Select an option

Save hden/4129181 to your computer and use it in GitHub Desktop.
Extract flanking sequence of a SNP (Chromosome in 2nd column, position in 4th column) appends sequences in a new column
{
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "Extract Flanking Sequence (csv to tab)",
"steps": {
"0": {
"annotation": "",
"id": 0,
"input_connections": {},
"inputs": [
{
"description": "",
"name": "Input Dataset"
}
],
"name": "Input dataset",
"outputs": [],
"position": {
"left": 200,
"top": 351
},
"tool_errors": null,
"tool_id": null,
"tool_state": "{\"name\": \"Input Dataset\"}",
"tool_version": null,
"type": "data_input",
"user_outputs": []
},
"1": {
"annotation": "",
"id": 1,
"input_connections": {
"input": {
"id": 0,
"output_name": "output"
}
},
"inputs": [],
"name": "Convert",
"outputs": [
{
"name": "out_file1",
"type": "tabular"
}
],
"position": {
"left": 420,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Convert characters1",
"tool_state": "{\"__page__\": 0, \"convert_from\": \"\\\"C\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"input\": \"null\"}",
"tool_version": "1.0.0",
"type": "tool",
"user_outputs": []
},
"2": {
"annotation": "",
"id": 2,
"input_connections": {
"input": {
"id": 1,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Select first",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 645,
"top": 200
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Show beginning1",
"tool_state": "{\"__page__\": 0, \"input\": \"null\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"lineNum\": \"\\\"1\\\"\"}",
"tool_version": "1.0.0",
"type": "tool",
"user_outputs": []
},
"3": {
"annotation": "",
"id": 3,
"input_connections": {
"input": {
"id": 1,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Cut",
"outputs": [
{
"name": "out_file1",
"type": "tabular"
}
],
"position": {
"left": 640,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Cut1",
"tool_state": "{\"columnList\": \"\\\"c2,c4\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"__page__\": 0}",
"tool_version": "1.0.1",
"type": "tool",
"user_outputs": []
},
"4": {
"annotation": "",
"id": 4,
"input_connections": {
"input": {
"id": 1,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Compute",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 641,
"top": 506
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Add_a_column1",
"tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c4+10\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"round\": \"\\\"yes\\\"\", \"input\": \"null\"}",
"tool_version": "1.1.0",
"type": "tool",
"user_outputs": []
},
"5": {
"annotation": "",
"id": 5,
"input_connections": {
"input": {
"id": 2,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Add column",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 903,
"top": 206
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "addValue",
"tool_state": "{\"__page__\": 0, \"input\": \"null\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"exp\": \"\\\"Flanking Sequence\\\"\", \"iterate\": \"\\\"no\\\"\"}",
"tool_version": "1.0.0",
"type": "tool",
"user_outputs": []
},
"6": {
"annotation": "",
"id": 6,
"input_connections": {
"input": {
"id": 3,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Compute",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 860,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Add_a_column1",
"tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c2-200\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"round\": \"\\\"yes\\\"\", \"input\": \"null\"}",
"tool_version": "1.1.0",
"type": "tool",
"user_outputs": []
},
"7": {
"annotation": "",
"id": 7,
"input_connections": {
"input": {
"id": 6,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Compute",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 1080,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Add_a_column1",
"tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c2+200\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"round\": \"\\\"yes\\\"\", \"input\": \"null\"}",
"tool_version": "1.1.0",
"type": "tool",
"user_outputs": []
},
"8": {
"annotation": "",
"id": 8,
"input_connections": {
"input": {
"id": 7,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Cut",
"outputs": [
{
"name": "out_file1",
"type": "tabular"
}
],
"position": {
"left": 1300,
"top": 351
},
"post_job_actions": {
"ChangeDatatypeActionout_file1": {
"action_arguments": {
"newtype": "interval"
},
"action_type": "ChangeDatatypeAction",
"output_name": "out_file1"
}
},
"tool_errors": null,
"tool_id": "Cut1",
"tool_state": "{\"columnList\": \"\\\"c1,c3,c4\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"__page__\": 0}",
"tool_version": "1.0.1",
"type": "tool",
"user_outputs": []
},
"9": {
"annotation": "",
"id": 9,
"input_connections": {
"input": {
"id": 8,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Extract Genomic DNA",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 1520,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Extract genomic DNA 1",
"tool_state": "{\"out_format\": \"\\\"fasta\\\"\", \"__page__\": 0, \"interpret_features\": \"\\\"yes\\\"\", \"seq_source\": \"{\\\"index_source\\\": \\\"cached\\\", \\\"__current_case__\\\": 0}\", \"input\": \"null\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\"}",
"tool_version": "2.2.2",
"type": "tool",
"user_outputs": []
},
"10": {
"annotation": "",
"id": 10,
"input_connections": {
"input": {
"id": 9,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "FASTA-to-Tabular",
"outputs": [
{
"name": "output",
"type": "tabular"
}
],
"position": {
"left": 1740,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "fasta2tab",
"tool_state": "{\"input\": \"null\", \"keep_first\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"__page__\": 0}",
"tool_version": "1.1.0",
"type": "tool",
"user_outputs": []
},
"11": {
"annotation": "",
"id": 11,
"input_connections": {
"input1": {
"id": 4,
"output_name": "out_file1"
},
"input2": {
"id": 10,
"output_name": "output"
}
},
"inputs": [],
"name": "Paste",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 1962,
"top": 464
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Paste1",
"tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"delimiter\": \"\\\"T\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\"}",
"tool_version": "1.0.0",
"type": "tool",
"user_outputs": []
},
"12": {
"annotation": "",
"id": 12,
"input_connections": {
"input": {
"id": 11,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Cut",
"outputs": [
{
"name": "out_file1",
"type": "tabular"
}
],
"position": {
"left": 2180,
"top": 351
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "Cut1",
"tool_state": "{\"columnList\": \"\\\"c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c19\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"__page__\": 0}",
"tool_version": "1.0.1",
"type": "tool",
"user_outputs": []
},
"13": {
"annotation": "",
"id": 13,
"input_connections": {
"input1": {
"id": 5,
"output_name": "out_file1"
},
"queries_0|input2": {
"id": 12,
"output_name": "out_file1"
}
},
"inputs": [],
"name": "Concatenate datasets",
"outputs": [
{
"name": "out_file1",
"type": "input"
}
],
"position": {
"left": 2391,
"top": 243
},
"post_job_actions": {},
"tool_errors": null,
"tool_id": "cat1",
"tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"chromInfo\": \"\\\"/galaxy/home/g2main/galaxy_main/tool-data/shared/ucsc/chrom/hg19.len\\\"\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}]\"}",
"tool_version": "1.0.0",
"type": "tool",
"user_outputs": []
}
}
}
@sanjeevksh
Copy link

Hi hden,
Can this script put the target SNP alleles in square bracket and also flag up the SNPs in flanking sequences as IUPAC codes?
Regards,
SS

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment