Created
March 30, 2016 05:01
-
-
Save jerowe/aae7a41b1fcac9f772be0c29ae2cd579 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # loading raw data | |
| gunzip lane1_NoIndex_L001_R1_001.fastq.gz | |
| gunzip lane1_NoIndex_L001_R2_001.fastq.gz | |
| gunzip lane1_NoIndex_L001_R3_001.fastq.gz | |
| # fix barcode headers to match R1 headers | |
| cat lane1_NoIndex_L001_R2_001.fastq | sed 's/2:N:[0-9]\+:/1:N:0:/g' > barcodes.fixed.fastq | |
| # validate mapping file | |
| validate_mapping_file.py -m CDC1_MAP.txt -o corrected_map | |
| # join forward and reverse reads | |
| join_paired_ends.py --min_overlap 10 --perc_max_diff 20 --forward_reads_fp lane1_NoIndex_L001_R1_001.fastq --reverse_reads_fp lane1_NoIndex_L001_R3_001.fastq --index_reads_fp barcodes.fixed.fastq --output_dir 1_join_paired_ends | |
| # split libraries | |
| split_libraries_fastq.py -i 1_join_paired_ends/fastqjoin.join.fastq --rev_comp_mapping_barcodes -o split_out_paired_cdc1/ -b 1_join_paired_ends/fastqjoin.join_barcodes.fastq -m CDC1_MAP.txt -s 1000000000 | |
| # if necessary, combine sequences from multiple experiments here | |
| cat cdc1/split_out_paired_cdc1/seqs.fna cdc2/split_out_paired_cdc2/seqs.fna HMAC/split_out_paired_hmac/seqs.fna > combined_hmac_cdc_seqs_071615.fna | |
| # pick de novo OTUs using split_libraries output | |
| pick_de_novo_otus.py -i combined_hmac_cdc_seqs_071615.fna -o hmac_cdc_otus | |
| #now remove chimeras from rep_set_aligned fasta file using a reference aligned fasta file (http://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/) | |
| #(make sure all files used in this step are in one directory, otherwise chimerslayer won't work) | |
| identify_chimeric_seqs.py -m ChimeraSlayer -i combined_hmac_cdc_seqs_071615_rep_set_aligned.fasta -a core_set_aligned.fasta.imputed -o chimeric_seqs_core.txt | |
| #remove chimeric sequences from alignment before filtering and tree building | |
| filter_fasta.py -f combined_hmac_cdc_seqs_071615_rep_set_aligned.fasta -o non_chimeric_rep_set_aligned.fasta -s chimeric_seqs_core.txt -n | |
| #filter alignment (i.e. get rid of blank spaces) | |
| filter_alignment.py -i non_chimeric_rep_set_aligned.fasta -o non_chimeric_filtered_alignment/ | |
| #make tree; later can add tree to phyloseq object in R | |
| make_phylogeny.py -i non_chimeric_filtered_alignment/non_chimeric_rep_set_aligned_pfiltered.fasta -o CH_non_chimeric.tre | |
| #make otu table, otu_map is file created after first step (in uclust_picked_otus folder) and taxonomy file is in uclust_assigned_taxonomy folder; at same time, remove sequences that failed alignment | |
| #make_otu_table.py -i uclust_picked_otus/combined_hmac_cdc_seqs_071615_otus.txt -o otu_table_no_fails.biom -e pynast_aligned_seqs/combined_hmac_cdc_seqs_071615_rep_set_failures.fasta -t uclust_assigned_taxonomy/combined_hmac_cdc_seqs_071615_rep_set_tax_assignments.txt | |
| #filter out chimeras | |
| #filter_otus_from_otu_table.py -i otu_table_no_fails.biom -o otu_table_no_fails_chims.biom -e pynast_aligned_seqs/chimeraSlayer/chimeric_seqs_core.txt | |
| #remove singletons from biome file | |
| #filter_otus_from_otu_table.py -i otu_table_no_fails_chims.biom -o otu_table_no_fails_chims_singles.biom -n 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment