Skip to content

Instantly share code, notes, and snippets.

@Phlya
Created December 15, 2020 16:35
Show Gist options
  • Save Phlya/905932e821ba8c193e5920bbfddf175c to your computer and use it in GitHub Desktop.
Save Phlya/905932e821ba8c193e5920bbfddf175c to your computer and use it in GitHub Desktop.
Distiller project for Hi-C processing of Phanstiel et al. 2017 data
# Fastqs can be provided as:
# -- a pairs of relative/absolute paths
# -- sra:<SRA_NUMBER>, optionally followed by the indices of the first and
# the last entry in the SRA in the form of "?start=<first>&end=<last>
# [to implement] -- as a path to a folder with fastqs '<base_folder>', with the structure
# <base_folder>/<library_name>/<run_name>/, with each folder containing only
# two fastq.gz files
# python bin/geo2yaml.py PRJNA385337 --title_column library_name --title_sub '\.[^.]*$' '' --title_sub '\.' '_' --group_sub '_[^_]+$' ''
input:
raw_reads_paths:
THP1_Ctrl_1_1:
lane1:
- sra:SRR5519214
lane2:
- sra:SRR5519215
lane3:
- sra:SRR5519216
lane4:
- sra:SRR5519217
lane5:
- sra:SRR5519218
lane6:
- sra:SRR5519220
THP1_Ctrl_1_2:
lane1:
- sra:SRR5519221
lane2:
- sra:SRR5519222
lane3:
- sra:SRR5519223
lane4:
- sra:SRR5519224
lane5:
- sra:SRR5519225
lane6:
- sra:SRR5519226
THP1_Ctrl_1_3:
lane1:
- sra:SRR5519227
lane2:
- sra:SRR5519228
lane3:
- sra:SRR5519229
lane4:
- sra:SRR5519231
lane5:
- sra:SRR5519232
lane6:
- sra:SRR5519233
THP1_Ctrl_2_1:
lane1:
- sra:SRR5519234
lane2:
- sra:SRR5519235
lane3:
- sra:SRR5519236
lane4:
- sra:SRR5519237
lane5:
- sra:SRR5519238
lane6:
- sra:SRR5519239
THP1_Ctrl_2_2:
lane1:
- sra:SRR5519240
lane2:
- sra:SRR5519242
lane3:
- sra:SRR5519243
lane4:
- sra:SRR5519244
lane5:
- sra:SRR5519245
lane6:
- sra:SRR5519246
THP1_Ctrl_2_3:
lane1:
- sra:SRR5519247
lane2:
- sra:SRR5519248
lane3:
- sra:SRR5519249
lane4:
- sra:SRR5519250
lane5:
- sra:SRR5519251
lane6:
- sra:SRR5519253
THP1_PMA_1_1:
lane1:
- sra:SRR5519185
lane2:
- sra:SRR5519186
lane3:
- sra:SRR5519197
lane4:
- sra:SRR5519208
lane5:
- sra:SRR5519219
lane6:
- sra:SRR5519230
THP1_PMA_1_2:
lane1:
- sra:SRR5519187
lane2:
- sra:SRR5519241
lane3:
- sra:SRR5519252
lane4:
- sra:SRR5519254
lane5:
- sra:SRR5519255
THP1_PMA_1_3:
lane1:
- sra:SRR5519188
lane2:
- sra:SRR5519189
lane3:
- sra:SRR5519190
lane4:
- sra:SRR5519191
lane5:
- sra:SRR5519192
lane6:
- sra:SRR5519193
THP1_PMA_2_1:
lane1:
- sra:SRR5519194
lane2:
- sra:SRR5519195
lane3:
- sra:SRR5519196
lane4:
- sra:SRR5519198
lane5:
- sra:SRR5519199
lane6:
- sra:SRR5519200
THP1_PMA_2_2:
lane1:
- sra:SRR5519201
lane2:
- sra:SRR5519202
lane3:
- sra:SRR5519203
lane4:
- sra:SRR5519204
lane5:
- sra:SRR5519205
lane6:
- sra:SRR5519206
THP1_PMA_2_3:
lane1:
- sra:SRR5519207
lane2:
- sra:SRR5519209
lane3:
- sra:SRR5519210
lane4:
- sra:SRR5519211
lane5:
- sra:SRR5519212
lane6:
- sra:SRR5519213
library_groups:
THP1_Ctrl_1:
- THP1_Ctrl_1_1
- THP1_Ctrl_1_2
- THP1_Ctrl_1_3
THP1_Ctrl_2:
- THP1_Ctrl_2_1
- THP1_Ctrl_2_2
- THP1_Ctrl_2_3
THP1_PMA_1:
- THP1_PMA_1_1
- THP1_PMA_1_2
- THP1_PMA_1_3
THP1_PMA_2:
- THP1_PMA_2_1
- THP1_PMA_2_2
- THP1_PMA_2_3
THP1_Ctrl:
- THP1_Ctrl_1_1
- THP1_Ctrl_1_2
- THP1_Ctrl_1_3
- THP1_Ctrl_2_1
- THP1_Ctrl_2_2
- THP1_Ctrl_2_3
THP1_PMA:
- THP1_PMA_1_1
- THP1_PMA_1_2
- THP1_PMA_1_3
- THP1_PMA_2_1
- THP1_PMA_2_2
- THP1_PMA_2_3
truncate_fastq_reads: 0
genome:
assembly_name: 'hg38'
bwa_index_wildcard_path: '/exports/igmm/eddie/wendy-lab/ilia/genomes/hg38/bwaindex/hg38.fa*'
chrom_sizes_path: '/exports/igmm/eddie/wendy-lab/ilia/genomes/hg38/chrfile.txt'
do_fastqc: False
map:
chunksize: 10000000
mapping_options: ''
long_reads: False
parse:
make_pairsam: False
drop_readid: False
drop_seq: True
keep_unparsed_bams: False
parsing_options: '--add-columns mapq'
dedup:
max_mismatch_bp: 0
bin:
resolutions:
- 10000000
- 5000000
- 2500000
- 1000000
- 500000
- 250000
- 100000
- 50000
- 25000
- 10000
- 5000
- 2000
- 1000
balance: True
# balance_options: '--cis-only'
filters:
# no_filter: ''
mapq_30: '(mapq1>=30) and (mapq2>=30)'
output:
dirs:
processed_fastqs: './project/processed_fastqs/'
mapped_parsed_sorted_chunks: './project/mapped_parsed_sorted_chunks'
fastqc: './project/output/fastqc/'
pairs_library: './project/output/pairs_library'
coolers_library: './project/output/coolers_library/'
coolers_library_group: './project/output/coolers_library_group/'
stats_library_group: './project/output/stats_library_group/'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment