Last active
September 22, 2022 16:31
-
-
Save IsmailM/5a41e69352c49ce165dbad013a0a57d3 to your computer and use it in GitHub Desktop.
chromimpute analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eux | |
CONVERTED_DIR=alt/v4/01_converted_data | |
GLOBAL_DIST_DIR=alt/v4/02_global_dist | |
TRAINDATA_DIR=alt/v4/03_traindata | |
TRAINED_PREDICTORS_DIR=alt/v4/04_trained_predictors | |
APPLY_DATA_DIR=alt/v4/05_apply_data | |
mkdir -p ${CONVERTED_DIR} | |
mkdir -p ${GLOBAL_DIST_DIR} | |
mkdir -p ${TRAINDATA_DIR} | |
mkdir -p ${TRAINED_PREDICTORS_DIR} | |
mkdir -p ${APPLY_DATA_DIR} | |
# Convert the DATA first... | |
chromimpute Convert -m H3K4me3 data marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
chromimpute Convert -m H3K36me3 data marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
chromimpute Convert -m H3K9me3 data marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
chromimpute Convert -m H3K4me1 data marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
chromimpute Convert -m H3K27me3 data marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
chromimpute Convert -m H3K27ac data marks.csv hg38.chrom.sizes ${CONVERTED_DIR} | |
# Set up ComputeGlobalDist | |
chromimpute ComputeGlobalDist -m H3K4me3 ${CONVERTED_DIR} marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
chromimpute ComputeGlobalDist -m H3K36me3 ${CONVERTED_DIR} marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
chromimpute ComputeGlobalDist -m H3K9me3 ${CONVERTED_DIR} marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
chromimpute ComputeGlobalDist -m H3K4me1 ${CONVERTED_DIR} marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
chromimpute ComputeGlobalDist -m H3K27me3 ${CONVERTED_DIR} marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
chromimpute ComputeGlobalDist -m H3K27ac ${CONVERTED_DIR} marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR} | |
# GenerateTrainData | |
chromimpute GenerateTrainData -dnamethyl wgbs_data/wgbs_data_files.tsv wgbs_data wgbs_data/header.tsv \ | |
-d 100 ${CONVERTED_DIR} ${GLOBAL_DIST_DIR} marks.csv hg38.chrom.sizes ${TRAINDATA_DIR} dna_methyl | |
# Train | |
samples=(D1 D3 D5 D7) | |
# samples=(D1 D2 D3 D4 D5 D6 D7 D8) | |
for sample in "${samples[@]}"; do | |
chromimpute Train -dnamethyl wgbs_data/header.tsv ${TRAINDATA_DIR} marks.csv ${TRAINED_PREDICTORS_DIR} ${sample} dna_methyl | |
done | |
samples=(D1 D2 D3 D4 D5 D6 D7 D8) | |
for sample in "${samples[@]}"; do | |
chromimpute Apply -dnamethyl wgbs_data/wgbs_data_files.tsv wgbs_data wgbs_data/header.tsv \ | |
${CONVERTED_DIR} ${GLOBAL_DIST_DIR} ${TRAINED_PREDICTORS_DIR} marks.csv hg38.chrom.sizes ${APPLY_DATA_DIR} ${sample} dna_methyl | |
done | |
# IllegalArgumentException: No previously trained classifiers for mark dna_methyl were found available to load! | |
# at ernst.ChromImpute.ChromImpute.executeApply(ChromImpute.java:2285) | |
# at ernst.ChromImpute.ChromImpute.<init>(ChromImpute.java:1145) | |
# at ernst.ChromImpute.ChromImpute.main(ChromImpute.java:8107) | |
# java.lang.IllegalArgumentException: No previously trained cla | |
# Note - I had to change L39 above to L40 above in order to get this to work. | |
# (i.e. using the same other files)... |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
D1 H3K27ac IHECRE00000101.3.31b03180-586e-4b42-aa9e-2a124599ee9d.pval0.01.500K.narrowPeak.gz | |
D1 H3K27me3 IHECRE00000101.3.15b5016e-771a-4791-960d-6d40861428f9.pval0.01.500K.narrowPeak.gz | |
D1 H3K36me3 IHECRE00000101.3.06d0440d-166a-4bfb-8784-29eb25ba34d7.pval0.01.500K.narrowPeak.gz | |
D1 H3K4me1 IHECRE00000101.3.16a3040f-b17c-4d67-9177-3d83db7f277e.pval0.01.500K.narrowPeak.gz | |
D1 H3K4me3 IHECRE00000101.3.7aca552f-d7f4-4773-8438-94768d4b35a1.pval0.01.500K.narrowPeak.gz | |
D1 H3K9me3 IHECRE00000101.3.728f2424-573a-4743-bf7a-a87d17545a80.pval0.01.500K.narrowPeak.gz | |
D2 H3K27ac IHECRE00000027.3.97de962a-0cae-4248-ab76-3d3fe1777a34.pval0.01.500K.narrowPeak.gz | |
D2 H3K27me3 IHECRE00000027.3.0f1ed0f5-f1e0-4d74-b3bf-fb3d751dc985.pval0.01.500K.narrowPeak.gz | |
D2 H3K36me3 IHECRE00000027.3.69150add-aea1-4da8-8056-9d84554477f4.pval0.01.500K.narrowPeak.gz | |
D2 H3K4me1 IHECRE00000027.3.5192d960-f7e6-4c19-807d-86c5f08ffc22.pval0.01.500K.narrowPeak.gz | |
D2 H3K4me3 IHECRE00000027.3.93c45466-c37d-4584-ac62-84daaf3ab1fa.pval0.01.500K.narrowPeak.gz | |
D2 H3K9me3 IHECRE00000027.3.ee84a798-d432-4ddb-a16e-2e66142b00aa.pval0.01.500K.narrowPeak.gz | |
D3 H3K27ac IHECRE00000048.3.1896e4ef-d88b-439f-9ac2-0cee1d66c3f3.pval0.01.500K.narrowPeak.gz | |
D3 H3K27me3 IHECRE00000048.3.23af1bd1-f0c5-453e-8760-bf047a842cd8.pval0.01.500K.narrowPeak.gz | |
D3 H3K36me3 IHECRE00000048.3.a2079e84-2427-4d94-b528-ea4d0be6a729.pval0.01.500K.narrowPeak.gz | |
D3 H3K4me1 IHECRE00000048.3.5644ae9c-281d-4590-b1f4-0e20de6845e6.pval0.01.500K.narrowPeak.gz | |
D3 H3K4me3 IHECRE00000048.3.8c570f10-9831-4857-a68d-6de7246feef0.pval0.01.500K.narrowPeak.gz | |
D3 H3K9me3 IHECRE00000048.3.11a776de-361e-4edd-b4c1-1153e9a72498.pval0.01.500K.narrowPeak.gz | |
D4 H3K27ac IHECRE00000155.3.8279b76b-57ad-4ec8-b5cb-d16fdac7512c.pval0.01.500K.narrowPeak.gz | |
D4 H3K27me3 IHECRE00000155.3.65fa8cc1-284a-4261-8b82-5bca18ab1a7b.pval0.01.500K.narrowPeak.gz | |
D4 H3K36me3 IHECRE00000155.3.f96e1b9b-e27a-457e-8b0f-804f1e61ec6e.pval0.01.500K.narrowPeak.gz | |
D4 H3K4me1 IHECRE00000155.3.856b1d64-01ad-4d8f-893b-b027b47318ee.pval0.01.500K.narrowPeak.gz | |
D4 H3K4me3 IHECRE00000155.3.8aad1d9d-d4ae-4ccf-a70a-5db7693e9703.pval0.01.500K.narrowPeak.gz | |
D4 H3K9me3 IHECRE00000155.3.2e98c95e-7e9b-4407-af30-61730f99e499.pval0.01.500K.narrowPeak.gz | |
D5 H3K27ac IHECRE00000774.3.0226d0bd-e448-4e8e-922e-bd3e7a7abd00.pval0.01.500K.narrowPeak.gz | |
D5 H3K27me3 IHECRE00000774.3.2555c044-a169-434c-aa5a-cdb2619e5c02.pval0.01.500K.narrowPeak.gz | |
D5 H3K36me3 IHECRE00000774.3.c74b9473-7abb-43f6-8885-3c40e0552ac6.pval0.01.500K.narrowPeak.gz | |
D5 H3K4me1 IHECRE00000774.3.ec5f998a-c189-48df-8b3d-327b2930af23.pval0.01.500K.narrowPeak.gz | |
D5 H3K4me3 IHECRE00000774.3.22c5c812-e487-45a1-b100-255b7ac328af.pval0.01.500K.narrowPeak.gz | |
D5 H3K9me3 IHECRE00000774.3.01056fa5-2189-424c-aa80-7250e2ec9dc9.pval0.01.500K.narrowPeak.gz | |
D6 H3K27ac IHECRE00000866.3.0925b7f7-7c86-40ca-bdc0-1ca853709a23.pval0.01.500K.narrowPeak.gz | |
D6 H3K27me3 IHECRE00000866.3.1ccf14fb-5a5a-4c74-8157-04b7e1f2dcf1.pval0.01.500K.narrowPeak.gz | |
D6 H3K36me3 IHECRE00000866.3.8ab33063-999a-4988-84dd-a334b327781d.pval0.01.500K.narrowPeak.gz | |
D6 H3K4me1 IHECRE00000866.3.667978be-7c54-441e-83d2-7798acad8e7b.pval0.01.500K.narrowPeak.gz | |
D6 H3K4me3 IHECRE00000866.3.2a076e8e-9a2f-43b5-99c3-eddd13655e6b.pval0.01.500K.narrowPeak.gz | |
D6 H3K9me3 IHECRE00000866.3.0bf1ced9-73d3-4bb4-80e8-08f993f2b136.pval0.01.500K.narrowPeak.gz | |
D7 H3K27ac IHECRE00000718.3.4124a800-a44d-4bfe-b76a-06de52b79d79.pval0.01.500K.narrowPeak.gz | |
D7 H3K27me3 IHECRE00000718.3.5e75e9ad-e9d2-4508-9566-bf664ca87cc0.pval0.01.500K.narrowPeak.gz | |
D7 H3K36me3 IHECRE00000718.3.3d668648-a4ff-43ad-ae6f-bcf86ce49985.pval0.01.500K.narrowPeak.gz | |
D7 H3K4me1 IHECRE00000718.3.3cf6cab8-5a06-44c2-b745-b249948e89e2.pval0.01.500K.narrowPeak.gz | |
D7 H3K4me3 IHECRE00000718.3.9c867cb2-87c1-446d-a7eb-1986e48fd4c0.pval0.01.500K.narrowPeak.gz | |
D7 H3K9me3 IHECRE00000718.3.aa15ac7e-15a0-48e8-acc8-ec3bdca62dd4.pval0.01.500K.narrowPeak.gz | |
D8 H3K27ac IHECRE00000828.1.45bbe382-78c1-4fac-99df-1349446e9df6.pval0.01.500K.narrowPeak.gz | |
D8 H3K27me3 IHECRE00000828.1.30590c4c-78e4-4da6-9844-49fd785aa3ea.pval0.01.500K.narrowPeak.gz | |
D8 H3K36me3 IHECRE00000828.1.ad8aa122-d0d8-45b6-96f2-58aee53acfb7.pval0.01.500K.narrowPeak.gz | |
D8 H3K4me1 IHECRE00000828.1.cd2006fa-f12a-47e4-a8de-2ef03985dfb6.pval0.01.500K.narrowPeak.gz | |
D8 H3K4me3 IHECRE00000828.1.3dc2375f-9530-437e-9fe7-54e01b6a99a6.pval0.01.500K.narrowPeak.gz | |
D8 H3K9me3 IHECRE00000828.1.aa54d00c-3307-444e-b6a2-8d8127554fe8.pval0.01.500K.narrowPeak.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
start | D1 | D3 | D5 | D7 |
---|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chr1 | chr-1.tsv.gz | |
---|---|---|
chr2 | chr-2.tsv.gz | |
chr3 | chr-3.tsv.gz | |
chr4 | chr-4.tsv.gz | |
chr5 | chr-5.tsv.gz | |
chr6 | chr-6.tsv.gz | |
chr7 | chr-7.tsv.gz | |
chr8 | chr-8.tsv.gz | |
chr9 | chr-9.tsv.gz | |
chr10 | chr-10.tsv.gz | |
chr11 | chr-11.tsv.gz | |
chr12 | chr-12.tsv.gz | |
chr13 | chr-13.tsv.gz | |
chr14 | chr-14.tsv.gz | |
chr15 | chr-15.tsv.gz | |
chr16 | chr-16.tsv.gz | |
chr17 | chr-17.tsv.gz | |
chr18 | chr-18.tsv.gz | |
chr19 | chr-19.tsv.gz | |
chr20 | chr-20.tsv.gz | |
chr21 | chr-21.tsv.gz | |
chr22 | chr-22.tsv.gz | |
chrX | chr-X.tsv.gz | |
chrY | chr-Y.tsv.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment