Created
December 15, 2020 16:35
-
-
Save Phlya/905932e821ba8c193e5920bbfddf175c to your computer and use it in GitHub Desktop.
Distiller project for Hi-C processing of Phanstiel et al. 2017 data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fastqs can be provided as: | |
# -- a pairs of relative/absolute paths | |
# -- sra:<SRA_NUMBER>, optionally followed by the indices of the first and | |
# the last entry in the SRA in the form of "?start=<first>&end=<last> | |
# [to implement] -- as a path to a folder with fastqs '<base_folder>', with the structure | |
# <base_folder>/<library_name>/<run_name>/, with each folder containing only | |
# two fastq.gz files | |
# python bin/geo2yaml.py PRJNA385337 --title_column library_name --title_sub '\.[^.]*$' '' --title_sub '\.' '_' --group_sub '_[^_]+$' '' | |
input: | |
raw_reads_paths: | |
THP1_Ctrl_1_1: | |
lane1: | |
- sra:SRR5519214 | |
lane2: | |
- sra:SRR5519215 | |
lane3: | |
- sra:SRR5519216 | |
lane4: | |
- sra:SRR5519217 | |
lane5: | |
- sra:SRR5519218 | |
lane6: | |
- sra:SRR5519220 | |
THP1_Ctrl_1_2: | |
lane1: | |
- sra:SRR5519221 | |
lane2: | |
- sra:SRR5519222 | |
lane3: | |
- sra:SRR5519223 | |
lane4: | |
- sra:SRR5519224 | |
lane5: | |
- sra:SRR5519225 | |
lane6: | |
- sra:SRR5519226 | |
THP1_Ctrl_1_3: | |
lane1: | |
- sra:SRR5519227 | |
lane2: | |
- sra:SRR5519228 | |
lane3: | |
- sra:SRR5519229 | |
lane4: | |
- sra:SRR5519231 | |
lane5: | |
- sra:SRR5519232 | |
lane6: | |
- sra:SRR5519233 | |
THP1_Ctrl_2_1: | |
lane1: | |
- sra:SRR5519234 | |
lane2: | |
- sra:SRR5519235 | |
lane3: | |
- sra:SRR5519236 | |
lane4: | |
- sra:SRR5519237 | |
lane5: | |
- sra:SRR5519238 | |
lane6: | |
- sra:SRR5519239 | |
THP1_Ctrl_2_2: | |
lane1: | |
- sra:SRR5519240 | |
lane2: | |
- sra:SRR5519242 | |
lane3: | |
- sra:SRR5519243 | |
lane4: | |
- sra:SRR5519244 | |
lane5: | |
- sra:SRR5519245 | |
lane6: | |
- sra:SRR5519246 | |
THP1_Ctrl_2_3: | |
lane1: | |
- sra:SRR5519247 | |
lane2: | |
- sra:SRR5519248 | |
lane3: | |
- sra:SRR5519249 | |
lane4: | |
- sra:SRR5519250 | |
lane5: | |
- sra:SRR5519251 | |
lane6: | |
- sra:SRR5519253 | |
THP1_PMA_1_1: | |
lane1: | |
- sra:SRR5519185 | |
lane2: | |
- sra:SRR5519186 | |
lane3: | |
- sra:SRR5519197 | |
lane4: | |
- sra:SRR5519208 | |
lane5: | |
- sra:SRR5519219 | |
lane6: | |
- sra:SRR5519230 | |
THP1_PMA_1_2: | |
lane1: | |
- sra:SRR5519187 | |
lane2: | |
- sra:SRR5519241 | |
lane3: | |
- sra:SRR5519252 | |
lane4: | |
- sra:SRR5519254 | |
lane5: | |
- sra:SRR5519255 | |
THP1_PMA_1_3: | |
lane1: | |
- sra:SRR5519188 | |
lane2: | |
- sra:SRR5519189 | |
lane3: | |
- sra:SRR5519190 | |
lane4: | |
- sra:SRR5519191 | |
lane5: | |
- sra:SRR5519192 | |
lane6: | |
- sra:SRR5519193 | |
THP1_PMA_2_1: | |
lane1: | |
- sra:SRR5519194 | |
lane2: | |
- sra:SRR5519195 | |
lane3: | |
- sra:SRR5519196 | |
lane4: | |
- sra:SRR5519198 | |
lane5: | |
- sra:SRR5519199 | |
lane6: | |
- sra:SRR5519200 | |
THP1_PMA_2_2: | |
lane1: | |
- sra:SRR5519201 | |
lane2: | |
- sra:SRR5519202 | |
lane3: | |
- sra:SRR5519203 | |
lane4: | |
- sra:SRR5519204 | |
lane5: | |
- sra:SRR5519205 | |
lane6: | |
- sra:SRR5519206 | |
THP1_PMA_2_3: | |
lane1: | |
- sra:SRR5519207 | |
lane2: | |
- sra:SRR5519209 | |
lane3: | |
- sra:SRR5519210 | |
lane4: | |
- sra:SRR5519211 | |
lane5: | |
- sra:SRR5519212 | |
lane6: | |
- sra:SRR5519213 | |
library_groups: | |
THP1_Ctrl_1: | |
- THP1_Ctrl_1_1 | |
- THP1_Ctrl_1_2 | |
- THP1_Ctrl_1_3 | |
THP1_Ctrl_2: | |
- THP1_Ctrl_2_1 | |
- THP1_Ctrl_2_2 | |
- THP1_Ctrl_2_3 | |
THP1_PMA_1: | |
- THP1_PMA_1_1 | |
- THP1_PMA_1_2 | |
- THP1_PMA_1_3 | |
THP1_PMA_2: | |
- THP1_PMA_2_1 | |
- THP1_PMA_2_2 | |
- THP1_PMA_2_3 | |
THP1_Ctrl: | |
- THP1_Ctrl_1_1 | |
- THP1_Ctrl_1_2 | |
- THP1_Ctrl_1_3 | |
- THP1_Ctrl_2_1 | |
- THP1_Ctrl_2_2 | |
- THP1_Ctrl_2_3 | |
THP1_PMA: | |
- THP1_PMA_1_1 | |
- THP1_PMA_1_2 | |
- THP1_PMA_1_3 | |
- THP1_PMA_2_1 | |
- THP1_PMA_2_2 | |
- THP1_PMA_2_3 | |
truncate_fastq_reads: 0 | |
genome: | |
assembly_name: 'hg38' | |
bwa_index_wildcard_path: '/exports/igmm/eddie/wendy-lab/ilia/genomes/hg38/bwaindex/hg38.fa*' | |
chrom_sizes_path: '/exports/igmm/eddie/wendy-lab/ilia/genomes/hg38/chrfile.txt' | |
do_fastqc: False | |
map: | |
chunksize: 10000000 | |
mapping_options: '' | |
long_reads: False | |
parse: | |
make_pairsam: False | |
drop_readid: False | |
drop_seq: True | |
keep_unparsed_bams: False | |
parsing_options: '--add-columns mapq' | |
dedup: | |
max_mismatch_bp: 0 | |
bin: | |
resolutions: | |
- 10000000 | |
- 5000000 | |
- 2500000 | |
- 1000000 | |
- 500000 | |
- 250000 | |
- 100000 | |
- 50000 | |
- 25000 | |
- 10000 | |
- 5000 | |
- 2000 | |
- 1000 | |
balance: True | |
# balance_options: '--cis-only' | |
filters: | |
# no_filter: '' | |
mapq_30: '(mapq1>=30) and (mapq2>=30)' | |
output: | |
dirs: | |
processed_fastqs: './project/processed_fastqs/' | |
mapped_parsed_sorted_chunks: './project/mapped_parsed_sorted_chunks' | |
fastqc: './project/output/fastqc/' | |
pairs_library: './project/output/pairs_library' | |
coolers_library: './project/output/coolers_library/' | |
coolers_library_group: './project/output/coolers_library_group/' | |
stats_library_group: './project/output/stats_library_group/' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment