This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# compute reads length distribution from a fastq file | |
awk 'NR%4 == 2 {lengths[length($0)]++} END {for (l in lengths) {print l, lengths[l]}}' file.fastq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PBS -q q1day | |
#PBS -j oe | |
#PBS -o $HOME/working_directory/task.log | |
#PBS -N task | |
#PBS -m n | |
#PBS -l mem=memory_sizegb,nodes=1:ppn=core_number | |
#PBS -S /bin/bash | |
# if more than 65gb: | |
# -l nodes=1:ppn=1:bigmem |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
cat file.hit.clstr | awk '{if ($1 == ">Cluster") {clusterNumber = $2} else {print(clusterNumber"\t"$0)}}' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# split fastq file in two file for cases where paired-end are concatened (not interlaced) | |
(gzip -dc intput.fastq.gz) | awk -F"=" 'BEGIN {OFS = "\n"} {name = $0; getline seq; getline name2; getline phred; print name, substr(seq,0,int(length(seq)/2)), name2, substr(phred,0,int(length(seq)/2)) >> "intput-1.fastq"; print name, substr(seq,int(length(seq)/2)+1,length(seq)), name2, substr(phred,int(length(seq)/2)+1,length(seq)) >> "intput-2.fastq"}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
awk 'BEGIN {OFS = "\n"} {header = $0 ; getline seq ; getline qheader ; getline qseq ; if (length(seq) >= 20){print header, seq, qheader, qseq}}' < input.fastq > output.fastq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# count number of base in a fastq.gz | |
(gzip -dc $0) | awk 'NR%4 == 2 {basenumber += length($0)} END {print basenumber}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# return a list of sequence_id sequence_length from a fasta file | |
awk 'BEGIN {OFS = "\n"}; /^>/ {print(substr(sequence_id, 2)" "sequence_length); sequence_length = 0; sequence_id = $0}; /^[^>]/ {sequence_length += length($0)}; END {print(substr(sequence_id, 2)" "sequence_length)}' file.fasta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# convert fasta sequence to uppercase, usefull for nhmmer | |
awk '/^>/ {print($0)}; /^[^>]/ {print(toupper($0))}' file.fasta > file_upper.fasta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# remove all non ASCII character from file/folder names in a file tree | |
convmv -f utf8 -t ASCII -r . 2>&1 | grep ascii | perl -pe "s/ascii doesn't cover all needed characters for: //g" | awk '{system("mv "$0" \"$(echo "$0" | uconv -t ASCII -x nfd -c )\"")}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# alternative to renaming old file with a .back | |
# usage : back.sh file_name | |
mv "$1" "$1"$(date +".%m-%d-%y_%T") |
OlderNewer