This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Define the parameters | |
sequence_lengths=(600 12000 30000) | |
error_rates=(0.1 0.05 0.01) | |
num_sequences=1000 | |
motif="TTAGGG" | |
simulation_dir="simulation" | |
tidk_output_dir="tidk_output" | |
summary_file="tidk_explore_summary_results.tsv" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ln -s $assembly ${strain}.fasta | |
$trf_cmd ${strain}.fasta 2 7 7 80 10 50 2000 -f -h -d -m | |
rm ${strain}.fasta | |
mv ${strain}.fasta.2.7.7.80.10.50.2000.dat ${strain}.trf.dat | |
mv ${strain}.fasta.2.7.7.80.10.50.2000.mask ${strain}.trf.mask | |
grep -P '^[S0-9]' ${strain}.trf.dat | cut -d ' ' -f 1,2,3,4,6,7,8,13,14 | awk '{if(\$1 ~ /Sequence/){chr=\$2} else {print chr, \$0}}' | tr [:blank:] '\t' > ${strain}.trf.tsv | |
rm ${strain}.fasta |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
use File::Basename; | |
use Data::Dumper; | |
# globals |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from Bio import SeqIO | |
from Bio.SeqRecord import SeqRecord | |
from Bio.Seq import Seq | |
# define the window size, step size and the distance between paired reads | |
window_size = 150 | |
step_size = 100 | |
paired_distance = 300 | |
phred_score = 41 # maximum score in Illumina 1.8+ fastq format |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to remove alphabetic and special characters from a string | |
get_integers <- function(x) { | |
sub(".+[^0-9]([0-9]+)", "\\1", x) | |
} | |
# Print script usage and information | |
print_usage <- function() { | |
cat("------------------------------------------------------------------------\n") | |
cat("Script Usage:\n") | |
cat("This program processes a TSV file generated by the miltel program from the bleties package.\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to remove alphabetic and special characters from a string | |
get_integers <- function(x) { | |
sub(".+[^0-9]([0-9]+)", "\\1", x) | |
} | |
# Print script usage and information | |
print_usage <- function() { | |
cat("------------------------------------------------------------------------\n") | |
cat("Script Usage:\n") | |
cat("This program processes a TSV file generated by the miltel program from the bleties package.\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# requires(gtools) # for mixedsort function with roman numerals | |
### Funcions #### | |
# Function for loading buscos | |
read_busco <- function(buscoFile){ | |
read_tsv(buscoFile, | |
col_names = c("Busco_id", "Status", "Sequence", | |
"start", "end", "strand", "Score", "Length", | |
"OrthoDB_url", "Description"), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# singularity pull docker://4dndcic/4dn-hic:v42.2 | |
dn4="/software/singularity-v3.6.4/bin/singularity run -B /lustre:/lustre /lustre/scratch123/tol/teams/blaxter/projects/tol-nemotodes/sw/.singularity/4dn-hic_v42.sif" | |
$dn4 run-bam2pairs.sh merge.mkdup.sorted.bam nxOscSper1.1 | |
basename=nxOscSper1.1.4dn | |
cut -f1,2 $ref.fai > $ref.fai_sizes # vim adjust order of chr |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# User input #### | |
x_species <- c("nxOscDolc1.1", "nxOscOnir1.2") | |
y_species <- c("nxOscSper1.1", "nxOscSpeu1.1") | |
# path to BUSCO table directory | |
# path must end in "/" (e.g. busco_dir/) | |
busco_dir <- "analyses/paper1/prelim_to_20220220/nemaChromQC/" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(scales) | |
library(gtools) | |
min_seq_size <- 5e5 # Sequences shorter than this will not be plotted (the size of the sequence is inferred from the maximum coordinate of a BUSCO) | |
ref_busco <- "~/Downloads/CABPSW02.yahs_scaffolds_final_nematoda_odb10_full_table.tsv" # path to reference file | |
query_busco <- "~/Downloads/APS7_sophie.v4_nopipe.fasta.yahs_scaffolds_final_nematoda_odb10_full_table.tsv" # path to query file | |
ref_species <- "A. rhodensis" # text in x axis of plot | |
query_species <- "A. freiburgensis" # text in y axis of plot |
NewerOlder