Last active
October 10, 2018 00:27
-
-
Save conchoecia/cb27dc2ca94dcd8ccb89baa2fc4c01dc to your computer and use it in GitHub Desktop.
get info on linker content in HiC libaries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# this script makes plots of positions of linker sequences in HiC/Chicago libraries | |
#!/bin/bash | |
function processthis { | |
OUT1="${LIB}fpos.txt" | |
IN1="${LIB}_f.fastq.gz" | |
OUT2="${LIB}rpos.txt" | |
IN2="${LIB}_r.fastq.gz" | |
OUT3="${LIB}fpos_4.txt" | |
OUT4="${LIB}rpos_4.txt" | |
OUT5="${LIB}fpos_4_noAATTAATT.txt" | |
OUT6="${LIB}rpos_4_noAATTAATT.txt" | |
OUT7="${LIB}fpos_noAATT_start.txt" | |
OUT8="${LIB}rpos_noAATT_start.txt" | |
TABLE="${LIB}_table.txt" | |
PLOT="${LIB}_linkerpos.pdf" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{for(i=1; i<=75; i++){pos[i]=0}} {for (i=1; i<=75-8+1; i++) {thisseq=substr($seq, i, 8); if (thisseq==cutsite){pos[i] += 1} } } END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN1}" > "${OUT1}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{foursite=substr(cutsite,1,4); for(i=1; i<=75; i++){pos[i]=0}} {for (i=1; i<=75-8+1; i++) {thisseq=substr($seq, i, 4); if (thisseq==foursite){pos[i] += 1} } } END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN1}" > "${OUT3}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{foursite=substr(cutsite,1,4); for(i=1; i<=75; i++){pos[i]=0}} {cont=1; for (i=1; i<=75-8+1; i++) {if (substr($seq, i, 8)==cutsite){ cont=0}}; if (cont==1){for (i=1; i<=75-8+1; i++) {thisseq=substr($seq, i, 4); if (thisseq==foursite){pos[i] += 1} } }} END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN1}" > "${OUT5}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{foursite=substr(cutsite,1,4); for(i=1; i<=75; i++){pos[i]=0}} {cont=1; if (substr($seq, 1, 4)==foursite){ cont=0}; if (cont==1){for (i=1; i<=75-8+1; i++) {if (substr($seq, i, 8)==cutsite){pos[i] += 1} } } } END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN1}" > "${OUT7}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{for(i=1; i<=75; i++){pos[i]=0}} {for (i=1; i<=75-8+1; i++) {thisseq=substr($seq, i, 8); if (thisseq==cutsite){pos[i] += 1} } } END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN2}" > "${OUT2}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{foursite=substr(cutsite,1,4); for(i=1; i<=75; i++){pos[i]=0}} {for (i=1; i<=75-8+1; i++) {thisseq=substr($seq, i, 4); if (thisseq==foursite){pos[i] += 1} } } END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN2}" > "${OUT4}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{foursite=substr(cutsite,1,4); for(i=1; i<=75; i++){pos[i]=0}} {cont=1; for (i=1; i<=75-8+1; i++) {if (substr($seq, i, 8)==cutsite){ cont=0}}; if (cont==1){for (i=1; i<=75-8+1; i++) {thisseq=substr($seq, i, 4); if (thisseq==foursite){pos[i] += 1} } }} END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN2}" > "${OUT6}" | |
bioawk -v cutsite="${SITE}" -cfastx 'BEGIN{foursite=substr(cutsite,1,4); for(i=1; i<=75; i++){pos[i]=0}} {cont=1; if (substr($seq, 1, 4)==foursite){ cont=0}; if (cont==1){for (i=1; i<=75-8+1; i++) {if (substr($seq, i, 8)==cutsite){pos[i] += 1} } } } END{for (i=1; i<=75-8+1; i++) { printf("%d\t%d\n", i, pos[i])} }' "${IN2}" > "${OUT8}" | |
paste "${OUT1}" "${OUT2}" "${OUT3}" "${OUT4}" "${OUT5}" "${OUT6}" "${OUT7}" "${OUT8}"| awk '{printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $1, $2, $4, $6, $8, $10, $12, $14, $16)}' > "${TABLE}" | |
Rscript plot_table.R "${TABLE}" "${PLOT}" "${TITLE}" | |
} | |
SITE="AATTAATT" | |
LIB="DS249" | |
TITLE="DS249 AATTAATT HiC - Freeze-dried sample - NEB FS kit - ~50,000 reads" | |
processthis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
args <- commandArgs(trailingOnly = TRUE) | |
print(args) | |
# trailingOnly=TRUE means that only your arguments are returned, check: | |
# print(commandArgs(trailingOnly=FALSE)) | |
graph = args[1] | |
output = args[2] | |
title = args[3] | |
rm(args) | |
pdf(file=output) | |
df = read.table(graph, | |
sep="\t", header = FALSE, row.names = NULL) | |
plot(c(df$V1, df$V1), c(df$V2, df$V3), | |
pch=c(rep.int(1, length(df$V1)),rep.int(3, length(df$V1))), | |
main = title, | |
cex.main=0.5, | |
xlab = "position in read", | |
ylab = "number of occurrences of linker") | |
lines(df$V1, df$V2, lwd=2, col=rgb(1, 0, 0, 0.5)) | |
lines(df$V1, df$V3, lwd=2, col=rgb(0, 0, 1, 0.5)) | |
legend(55, max(max(df$V2), max(df$V3)) , | |
legend=c("R1", "R2"), | |
col=c(rgb(1, 0, 0, 0.5), rgb(0, 0, 1, 0.5)), lty=c(1,1), | |
lwd = c(2, 2), cex=1.2, pch = c(1, 3) ) | |
plot(c(df$V1, df$V1), c(df$V4, df$V5), | |
pch=c(rep.int(1, length(df$V1)),rep.int(3, length(df$V1))), | |
main = title, | |
cex.main=0.5, | |
xlab = "position in read", | |
ylab = "# linker[0:4]") | |
lines(df$V1, df$V4, lwd=2, col=rgb(1, 0, 0, 0.5)) | |
lines(df$V1, df$V5, lwd=2, col=rgb(0, 0, 1, 0.5)) | |
legend(55, max(max(df$V4), max(df$V5)) , | |
legend=c("R1", "R2"), | |
col=c(rgb(1, 0, 0, 0.5), rgb(0, 0, 1, 0.5)), lty=c(1,1), | |
lwd = c(2, 2), cex=1.2, pch = c(1, 3) ) | |
par(mfrow=c(2,1), | |
oma = c(3, 3, 0, 0)+0.2, # two rows of text at the outer left and bottom margin | |
mar = c(1, 1, 1, 1)+0.2, # space for one row of text at ticks and to separate plots | |
mgp = c(2, 1, 0), # axis label at 2 rows distance, tick labels at 1 row | |
xpd = NA) | |
plot(c(df$V1, df$V1), c(df$V2, df$V3), | |
pch=c(rep.int(1, length(df$V1)),rep.int(3, length(df$V1))), | |
main = title, | |
cex.main=0.5, | |
xlab = "", | |
ylab = "number of occurrences of linker") | |
lines(df$V1, df$V2, lwd=2, col=rgb(1, 0, 0, 0.5)) | |
lines(df$V1, df$V3, lwd=2, col=rgb(0, 0, 1, 0.5)) | |
legend(55, max(max(df$V2), max(df$V3)) , | |
legend=c("R1", "R2"), | |
col=c(rgb(1, 0, 0, 0.5), rgb(0, 0, 1, 0.5)), lty=c(1,1), | |
lwd = c(2, 2), cex=1.2, pch = c(1, 3) ) | |
plot(c(df$V1, df$V1), c(df$V4, df$V5), | |
pch=c(rep.int(1, length(df$V1)),rep.int(3, length(df$V1))), | |
xlab = "position in read", | |
ylab = "# linker[0:4]") | |
lines(df$V1, df$V4, lwd=2, col=rgb(1, 0, 0, 0.5)) | |
lines(df$V1, df$V5, lwd=2, col=rgb(0, 0, 1, 0.5)) | |
legend(55, max(max(df$V4), max(df$V5)) , | |
legend=c("R1", "R2"), | |
col=c(rgb(1, 0, 0, 0.5), rgb(0, 0, 1, 0.5)), lty=c(1,1), | |
lwd = c(2, 2), cex=1.2, pch = c(1, 3) ) | |
par(mfrow=c(2,1), | |
oma = c(3, 3, 0, 0)+0.2, # two rows of text at the outer left and bottom margin | |
mar = c(1, 1, 1, 1)+0.2, # space for one row of text at ticks and to separate plots | |
mgp = c(2, 1, 0), # axis label at 2 rows distance, tick labels at 1 row | |
xpd = NA) | |
plot(c(df$V1, df$V1), c(df$V6, df$V7), | |
pch=c(rep.int(1, length(df$V6)),rep.int(3, length(df$V7))), | |
main = title, | |
xlab="", | |
cex.main=0.5, | |
ylab = "# linker[0:4] if no linker[0:8] in read") | |
lines(df$V1, df$V6, lwd=2, col=rgb(1, 0, 0, 0.5)) | |
lines(df$V1, df$V7, lwd=2, col=rgb(0, 0, 1, 0.5)) | |
legend(55, max(max(df$V6), max(df$V7)) , | |
legend=c("R1", "R2"), | |
col=c(rgb(1, 0, 0, 0.5), rgb(0, 0, 1, 0.5)), lty=c(1,1), | |
lwd = c(2, 2), cex=1.2, pch = c(1, 3) ) | |
plot(c(df$V1, df$V1), c(df$V8, df$V9), | |
pch=c(rep.int(1, length(df$V8)),rep.int(3, length(df$V9))), | |
xlab = "position in read", | |
ylab = "# linker[0:8] if no linker[0:4] at pos0") | |
lines(df$V1, df$V8, lwd=2, col=rgb(1, 0, 0, 0.5)) | |
lines(df$V1, df$V9, lwd=2, col=rgb(0, 0, 1, 0.5)) | |
legend(55, max(max(df$V8), max(df$V9)) , | |
legend=c("R1", "R2"), | |
col=c(rgb(1, 0, 0, 0.5), rgb(0, 0, 1, 0.5)), lty=c(1,1), | |
lwd = c(2, 2), cex=1.2, pch = c(1, 3) ) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment