Created
August 6, 2015 15:59
-
-
Save wckdouglas/052bd7c986fd65b3673c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <string.h> | |
| #include <cstring> | |
| #include <iostream> | |
| #include <fstream> | |
| #include <gzstream.h> | |
| #include <sstream> | |
| using namespace std; | |
| //get the id hash table and | |
| //iterate over the fastq file | |
| // determine which sequence to print out | |
| string fixfilenum(int filenum) | |
| { | |
| string out; | |
| ostringstream convert; | |
| if (filenum > 10) | |
| { | |
| convert << filenum; | |
| out = convert.str(); | |
| } | |
| else | |
| { | |
| convert << filenum; | |
| out = "0" + convert.str(); | |
| } | |
| return out; | |
| } | |
| void splitFastq(char *fqFile, string filePrefix, int recordNum) | |
| { | |
| // open fastq file for kseq parsing | |
| cerr << "From " << fqFile << "...." << endl; | |
| cerr << "Splitting " << recordNum << " records per file" << endl; | |
| int maxLine = recordNum * 4; | |
| int lineCount = 0, filenum = 0; | |
| string filename; | |
| igzstream in(fqFile); | |
| ofstream outFile; | |
| for (string line; getline(in,line);) | |
| { | |
| if (lineCount == 0) | |
| { | |
| filename = filePrefix + "_" + fixfilenum(filenum) + ".fastq"; | |
| outFile.open(filename.c_str()); | |
| outFile << line << '\n'; | |
| } | |
| else if (lineCount == maxLine) | |
| { | |
| outFile.close(); | |
| cerr << "written " << filename << endl; | |
| lineCount = 0; | |
| filenum ++; | |
| filename = filePrefix + "_" + fixfilenum(filenum) + ".fastq"; | |
| outFile.open(filename.c_str()); | |
| outFile << line << '\n'; | |
| } | |
| else | |
| { | |
| outFile << line << '\n'; | |
| } | |
| lineCount ++; | |
| } | |
| outFile.close(); | |
| cerr << "written " << filename << endl; | |
| } | |
| void splitFastqZip(char *fqFile, string filePrefix, int recordNum) | |
| { | |
| // open fastq file for kseq parsing | |
| cerr << "From " << fqFile << "...." << endl; | |
| cerr << "Splitting " << recordNum << " records per file" << endl; | |
| int maxLine = recordNum * 4; | |
| int lineCount = 0, filenum = 0; | |
| string filename; | |
| igzstream in(fqFile); | |
| ogzstream outFile; | |
| for (string line; getline(in,line);) | |
| { | |
| if (lineCount == 0) | |
| { | |
| filename = filePrefix + "_" + fixfilenum(filenum) + ".fastq.gz"; | |
| outFile.open(filename.c_str()); | |
| outFile << line << '\n'; | |
| } | |
| else if (lineCount == maxLine) | |
| { | |
| outFile.close(); | |
| cerr << "written " << filename << endl; | |
| lineCount = 0; | |
| filenum ++; | |
| filename = filePrefix + "_" + fixfilenum(filenum) + ".fastq.gz"; | |
| outFile.open(filename.c_str()); | |
| outFile << line << '\n'; | |
| } | |
| else | |
| { | |
| outFile << line << '\n'; | |
| } | |
| lineCount ++; | |
| } | |
| outFile.close(); | |
| cerr << "written " << filename << endl; | |
| } | |
| // print usage | |
| void usage(string programname) | |
| { | |
| cerr << "usage: "<< programname << " -i <fqfile> -n <# of record per file> -o <prefix> [-z]" << endl; | |
| cerr << "[options]" << endl; | |
| cerr << "-i <fastq file>" << endl; | |
| cerr << "-n <number of record in each splitted file> default: 10000000" << endl; | |
| cerr << "-o <prefix>" << endl; | |
| cerr << "-z optional: gzip output" << endl; | |
| } | |
| // main function | |
| int main(int argc, char **argv){ | |
| char *fqFile; | |
| int c, recordNum = 10000000; | |
| int gz = 0; | |
| string programname = argv[0]; | |
| string filePrefix = ""; | |
| if (argc == 1){ | |
| usage(programname); | |
| return 1; | |
| } | |
| opterr = 0; | |
| // print usage if not enough argumnets | |
| while ((c = getopt(argc, argv, "i:n:o:z")) != -1){ | |
| switch (c){ | |
| case 'i': | |
| fqFile = optarg; | |
| break; | |
| case 'n': | |
| recordNum = atoi(optarg); | |
| break; | |
| case 'o': | |
| filePrefix = optarg; | |
| break; | |
| case 'z': | |
| gz = 1; | |
| break; | |
| case '?': | |
| if (optopt == 'n' || optopt == 'i' || optopt== 'o'){ | |
| cerr << "option n, i, p need arguments!" << endl; | |
| usage(programname); | |
| } | |
| else { | |
| usage(programname); | |
| } | |
| return 1; | |
| default: | |
| abort(); | |
| } | |
| } | |
| if (filePrefix == "" || strcmp(fqFile,"") == 0) | |
| { | |
| usage(programname); | |
| return 1; | |
| } | |
| // pass variable to fnuction | |
| if (gz == 0) | |
| { | |
| splitFastq(fqFile, filePrefix, recordNum); | |
| } | |
| else | |
| { | |
| splitFastqZip(fqFile, filePrefix, recordNum); | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment