Last active
September 22, 2022 16:25
-
-
Save IsmailM/df2245597767d7749c3678086e711b8d to your computer and use it in GitHub Desktop.
ChromImpute run on individual marks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eux | |
# Run like | |
# bash ./analysis.sh H3K27ac | |
# bash ./analysis.sh H3K27ac H3K27me3 H3K36me3 H3K9me3 | |
MARKS=$@ | |
PATTERN=$(echo ${MARKS[@]}|tr " " "|") | |
DIRPATTERN=$(echo ${MARKS[@]}|tr " " "_") | |
OUT_DIR=alt/individual/${DIRPATTERN} | |
mkdir -p ${OUT_DIR} | |
CONVERTED_DIR=${OUT_DIR}/01_converted_data | |
GLOBAL_DIST_DIR=${OUT_DIR}/02_global_dist | |
TRAINDATA_DIR=${OUT_DIR}/03_traindata | |
TRAINED_PREDICTORS_DIR=${OUT_DIR}/04_trained_predictors | |
APPLY_DATA_DIR=${OUT_DIR}/05_apply_data | |
mkdir ${CONVERTED_DIR} | |
mkdir ${GLOBAL_DIST_DIR} | |
mkdir ${TRAINDATA_DIR} | |
mkdir ${TRAINED_PREDICTORS_DIR} | |
mkdir ${APPLY_DATA_DIR} | |
cat marks.csv | grep -E ${PATTERN} > ${OUT_DIR}/marks.csv | |
echo $MARKS > ${OUT_DIR}/marks_list.tsv | |
# Convert the DATA first... | |
for MARK in $MARKS; do | |
chromimpute Convert -m ${MARK} data ${OUT_DIR}/marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
done | |
for MARK in $MARKS; do | |
chromimpute ComputeGlobalDist -m ${MARK} ${CONVERTED_DIR} ${OUT_DIR}/marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
done | |
# GenerateTrainData | |
chromimpute GenerateTrainData -dnamethyl wgbs_data/wgbs_data_files.tsv wgbs_data wgbs_data/header.tsv \ | |
-d 100 ${CONVERTED_DIR} ${GLOBAL_DIST_DIR} ${OUT_DIR}/marks.csv hg38.chrom.sizes ${TRAINDATA_DIR} dna_methyl | |
# Train | |
samples=(D1 D2 D3 D4 D5 D6 D7 D8) | |
for sample in "${samples[@]}"; do | |
chromimpute Train -dnamethyl wgbs_data/header.tsv ${TRAINDATA_DIR} ${OUT_DIR}/marks.csv ${TRAINED_PREDICTORS_DIR} ${sample} dna_methyl | |
done | |
samples=(D1 D2 D3 D4 D5 D6 D7 D8) | |
for sample in "${samples[@]}"; do | |
chromimpute Apply -dnamethyl wgbs_data/wgbs_data_files.tsv wgbs_data wgbs_data/header.tsv \ | |
${CONVERTED_DIR} ${GLOBAL_DIST_DIR} ${TRAINED_PREDICTORS_DIR} ${OUT_DIR}/marks.csv hg38.chrom.sizes ${APPLY_DATA_DIR} ${sample} dna_methyl | |
done |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example marks.csv generated by the above script | |
D1 H3K27ac IHECRE00000101.3.31b03180-586e-4b42-aa9e-2a124599ee9d.pval0.01.500K.narrowPeak.gz | |
D2 H3K27ac IHECRE00000027.3.97de962a-0cae-4248-ab76-3d3fe1777a34.pval0.01.500K.narrowPeak.gz | |
D3 H3K27ac IHECRE00000048.3.1896e4ef-d88b-439f-9ac2-0cee1d66c3f3.pval0.01.500K.narrowPeak.gz | |
D4 H3K27ac IHECRE00000155.3.8279b76b-57ad-4ec8-b5cb-d16fdac7512c.pval0.01.500K.narrowPeak.gz | |
D5 H3K27ac IHECRE00000774.3.0226d0bd-e448-4e8e-922e-bd3e7a7abd00.pval0.01.500K.narrowPeak.gz | |
D6 H3K27ac IHECRE00000866.3.0925b7f7-7c86-40ca-bdc0-1ca853709a23.pval0.01.500K.narrowPeak.gz | |
D7 H3K27ac IHECRE00000718.3.4124a800-a44d-4bfe-b76a-06de52b79d79.pval0.01.500K.narrowPeak.gz | |
D8 H3K27ac IHECRE00000828.1.45bbe382-78c1-4fac-99df-1349446e9df6.pval0.01.500K.narrowPeak.gz |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original marks.csv used by the above script to create a new marks.csv (see below) for each separate mark | |
D1 H3K27ac IHECRE00000101.3.31b03180-586e-4b42-aa9e-2a124599ee9d.pval0.01.500K.narrowPeak.gz | |
D1 H3K27me3 IHECRE00000101.3.15b5016e-771a-4791-960d-6d40861428f9.pval0.01.500K.narrowPeak.gz | |
D1 H3K36me3 IHECRE00000101.3.06d0440d-166a-4bfb-8784-29eb25ba34d7.pval0.01.500K.narrowPeak.gz | |
D1 H3K4me1 IHECRE00000101.3.16a3040f-b17c-4d67-9177-3d83db7f277e.pval0.01.500K.narrowPeak.gz | |
D1 H3K4me3 IHECRE00000101.3.7aca552f-d7f4-4773-8438-94768d4b35a1.pval0.01.500K.narrowPeak.gz | |
D1 H3K9me3 IHECRE00000101.3.728f2424-573a-4743-bf7a-a87d17545a80.pval0.01.500K.narrowPeak.gz | |
D2 H3K27ac IHECRE00000027.3.97de962a-0cae-4248-ab76-3d3fe1777a34.pval0.01.500K.narrowPeak.gz | |
D2 H3K27me3 IHECRE00000027.3.0f1ed0f5-f1e0-4d74-b3bf-fb3d751dc985.pval0.01.500K.narrowPeak.gz | |
D2 H3K36me3 IHECRE00000027.3.69150add-aea1-4da8-8056-9d84554477f4.pval0.01.500K.narrowPeak.gz | |
D2 H3K4me1 IHECRE00000027.3.5192d960-f7e6-4c19-807d-86c5f08ffc22.pval0.01.500K.narrowPeak.gz | |
D2 H3K4me3 IHECRE00000027.3.93c45466-c37d-4584-ac62-84daaf3ab1fa.pval0.01.500K.narrowPeak.gz | |
D2 H3K9me3 IHECRE00000027.3.ee84a798-d432-4ddb-a16e-2e66142b00aa.pval0.01.500K.narrowPeak.gz | |
D3 H3K27ac IHECRE00000048.3.1896e4ef-d88b-439f-9ac2-0cee1d66c3f3.pval0.01.500K.narrowPeak.gz | |
D3 H3K27me3 IHECRE00000048.3.23af1bd1-f0c5-453e-8760-bf047a842cd8.pval0.01.500K.narrowPeak.gz | |
D3 H3K36me3 IHECRE00000048.3.a2079e84-2427-4d94-b528-ea4d0be6a729.pval0.01.500K.narrowPeak.gz | |
D3 H3K4me1 IHECRE00000048.3.5644ae9c-281d-4590-b1f4-0e20de6845e6.pval0.01.500K.narrowPeak.gz | |
D3 H3K4me3 IHECRE00000048.3.8c570f10-9831-4857-a68d-6de7246feef0.pval0.01.500K.narrowPeak.gz | |
D3 H3K9me3 IHECRE00000048.3.11a776de-361e-4edd-b4c1-1153e9a72498.pval0.01.500K.narrowPeak.gz | |
D4 H3K27ac IHECRE00000155.3.8279b76b-57ad-4ec8-b5cb-d16fdac7512c.pval0.01.500K.narrowPeak.gz | |
D4 H3K27me3 IHECRE00000155.3.65fa8cc1-284a-4261-8b82-5bca18ab1a7b.pval0.01.500K.narrowPeak.gz | |
D4 H3K36me3 IHECRE00000155.3.f96e1b9b-e27a-457e-8b0f-804f1e61ec6e.pval0.01.500K.narrowPeak.gz | |
D4 H3K4me1 IHECRE00000155.3.856b1d64-01ad-4d8f-893b-b027b47318ee.pval0.01.500K.narrowPeak.gz | |
D4 H3K4me3 IHECRE00000155.3.8aad1d9d-d4ae-4ccf-a70a-5db7693e9703.pval0.01.500K.narrowPeak.gz | |
D4 H3K9me3 IHECRE00000155.3.2e98c95e-7e9b-4407-af30-61730f99e499.pval0.01.500K.narrowPeak.gz | |
D5 H3K27ac IHECRE00000774.3.0226d0bd-e448-4e8e-922e-bd3e7a7abd00.pval0.01.500K.narrowPeak.gz | |
D5 H3K27me3 IHECRE00000774.3.2555c044-a169-434c-aa5a-cdb2619e5c02.pval0.01.500K.narrowPeak.gz | |
D5 H3K36me3 IHECRE00000774.3.c74b9473-7abb-43f6-8885-3c40e0552ac6.pval0.01.500K.narrowPeak.gz | |
D5 H3K4me1 IHECRE00000774.3.ec5f998a-c189-48df-8b3d-327b2930af23.pval0.01.500K.narrowPeak.gz | |
D5 H3K4me3 IHECRE00000774.3.22c5c812-e487-45a1-b100-255b7ac328af.pval0.01.500K.narrowPeak.gz | |
D5 H3K9me3 IHECRE00000774.3.01056fa5-2189-424c-aa80-7250e2ec9dc9.pval0.01.500K.narrowPeak.gz | |
D6 H3K27ac IHECRE00000866.3.0925b7f7-7c86-40ca-bdc0-1ca853709a23.pval0.01.500K.narrowPeak.gz | |
D6 H3K27me3 IHECRE00000866.3.1ccf14fb-5a5a-4c74-8157-04b7e1f2dcf1.pval0.01.500K.narrowPeak.gz | |
D6 H3K36me3 IHECRE00000866.3.8ab33063-999a-4988-84dd-a334b327781d.pval0.01.500K.narrowPeak.gz | |
D6 H3K4me1 IHECRE00000866.3.667978be-7c54-441e-83d2-7798acad8e7b.pval0.01.500K.narrowPeak.gz | |
D6 H3K4me3 IHECRE00000866.3.2a076e8e-9a2f-43b5-99c3-eddd13655e6b.pval0.01.500K.narrowPeak.gz | |
D6 H3K9me3 IHECRE00000866.3.0bf1ced9-73d3-4bb4-80e8-08f993f2b136.pval0.01.500K.narrowPeak.gz | |
D7 H3K27ac IHECRE00000718.3.4124a800-a44d-4bfe-b76a-06de52b79d79.pval0.01.500K.narrowPeak.gz | |
D7 H3K27me3 IHECRE00000718.3.5e75e9ad-e9d2-4508-9566-bf664ca87cc0.pval0.01.500K.narrowPeak.gz | |
D7 H3K36me3 IHECRE00000718.3.3d668648-a4ff-43ad-ae6f-bcf86ce49985.pval0.01.500K.narrowPeak.gz | |
D7 H3K4me1 IHECRE00000718.3.3cf6cab8-5a06-44c2-b745-b249948e89e2.pval0.01.500K.narrowPeak.gz | |
D7 H3K4me3 IHECRE00000718.3.9c867cb2-87c1-446d-a7eb-1986e48fd4c0.pval0.01.500K.narrowPeak.gz | |
D7 H3K9me3 IHECRE00000718.3.aa15ac7e-15a0-48e8-acc8-ec3bdca62dd4.pval0.01.500K.narrowPeak.gz | |
D8 H3K27ac IHECRE00000828.1.45bbe382-78c1-4fac-99df-1349446e9df6.pval0.01.500K.narrowPeak.gz | |
D8 H3K27me3 IHECRE00000828.1.30590c4c-78e4-4da6-9844-49fd785aa3ea.pval0.01.500K.narrowPeak.gz | |
D8 H3K36me3 IHECRE00000828.1.ad8aa122-d0d8-45b6-96f2-58aee53acfb7.pval0.01.500K.narrowPeak.gz | |
D8 H3K4me1 IHECRE00000828.1.cd2006fa-f12a-47e4-a8de-2ef03985dfb6.pval0.01.500K.narrowPeak.gz | |
D8 H3K4me3 IHECRE00000828.1.3dc2375f-9530-437e-9fe7-54e01b6a99a6.pval0.01.500K.narrowPeak.gz | |
D8 H3K9me3 IHECRE00000828.1.aa54d00c-3307-444e-b6a2-8d8127554fe8.pval0.01.500K.narrowPeak.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
start | D1 | D3 | D5 | D7 |
---|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chr1 | chr-1.tsv.gz | |
---|---|---|
chr2 | chr-2.tsv.gz | |
chr3 | chr-3.tsv.gz | |
chr4 | chr-4.tsv.gz | |
chr5 | chr-5.tsv.gz | |
chr6 | chr-6.tsv.gz | |
chr7 | chr-7.tsv.gz | |
chr8 | chr-8.tsv.gz | |
chr9 | chr-9.tsv.gz | |
chr10 | chr-10.tsv.gz | |
chr11 | chr-11.tsv.gz | |
chr12 | chr-12.tsv.gz | |
chr13 | chr-13.tsv.gz | |
chr14 | chr-14.tsv.gz | |
chr15 | chr-15.tsv.gz | |
chr16 | chr-16.tsv.gz | |
chr17 | chr-17.tsv.gz | |
chr18 | chr-18.tsv.gz | |
chr19 | chr-19.tsv.gz | |
chr20 | chr-20.tsv.gz | |
chr21 | chr-21.tsv.gz | |
chr22 | chr-22.tsv.gz | |
chrX | chr-X.tsv.gz | |
chrY | chr-Y.tsv.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment