/**
 * The module csv is a set of function to parse many format using a delimitter as csv file
 * Supported format:
 *  - .mat matrix file
 *  - .bed UCSC file
 * For parse a .csv file use std.csv
 */
module bed;

import std.conv;
import std.stdio;
import std.csv;
import std.traits;
import std.file;
import std.array;
import std.algorithm;
import std.range;
import std.string;
import std.exception;

/**
 * loadMatrixFile
 * load a matrix from a file.
 * Params:
 *  filePath    = path to file who contain matrix
 *  separator   = set delimiter used into the file for separate each column default it is tab
 * Returns:
 * A 2D array
 */
T[][] matrixReader( T )( string filePath, string separator = "\t" ){
    File matrixFile = File( filePath, "r");
    T[][] matrix;
    size_t length   = 10;
    size_t counter  = 0;
    matrix.length   = 10;
    foreach( line; matrixFile.byLine() ){
        if( length == counter ){
            length += 10;
            matrix.length = length;
        }
        matrix[counter] = array( map!(to!T)( filter!"!a.empty"(line.split( separator ) ) ) );// Use filter like split bug and do not merge consecutive delimiter
        counter++;
    }
    matrix.length = counter;
    return matrix;
}

struct BedData3{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
}

struct BedData4{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
}

struct BedData5{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
}

struct BedData6{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
}

struct BedData7{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
    size_t    thickStart;   // 6
}

struct BedData8{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
    size_t    thickStart;   // 6
    size_t    thickEnd;     // 7
}

struct BedData9{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
    size_t    thickStart;   // 6
    size_t    thickEnd;     // 7
    size_t[3] itemRgb;      // 8
}

struct BedData10{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
    size_t    thickStart;   // 6
    size_t    thickEnd;     // 7
    size_t[3] itemRgb;      // 8
    size_t    blockCount;   // 9
}

struct BedData11{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
    size_t    thickStart;   // 6
    size_t    thickEnd;     // 7
    size_t[3] itemRgb;      // 8
    size_t    blockCount;   // 9
    size_t    blockSizes;   // 10
}

struct BedData12{
    string    chrom;        // 0
    size_t    chromStart;   // 1
    size_t    chromEnd;     // 2
    string    name;         // 3
    size_t    score;        // 4
    char      strand;       // 5
    size_t    thickStart;   // 6
    size_t    thickEnd;     // 7
    size_t[3] itemRgb;      // 8
    size_t    blockCount;   // 9
    size_t    blockSizes;   // 10
    size_t    blockStarts;  // 11
}


struct BedMetadata{
    string      name;
    string      description;
    size_t      visibility;
    string      itemRgb;
    size_t      browserStart;
    size_t      browserEnd;
    string      chromosome;
    string      hide;

    string toString(){
        string result = "";
        if(  chromosome != "" && browserStart != 0 && browserEnd != 0 )
            result ~= "browser position %s:%d-%d\n".format( chromosome, browserStart, browserEnd );
        if( hide != "" )
            result ~= "browser hide %s\n".format( hide );
        if( name != "" && description != "" && visibility != 0 )
            result ~= "track name=%s description=%s visibility=%d\n".format( name, description, visibility );
        if( itemRgb != "" )
            result ~= "itemRgb=\"%s\"".format( itemRgb );
        return result;
    }
}


struct TrackLine{
    string      name;
    string      description;
    string      type;
    size_t      visibility;
    size_t[3]   color;
    string      itemRgb;
    size_t[3]   colorByStrand;
    size_t      useScore;
    string      group;
    string      db;
    size_t      offset;
    size_t      maxItems;
    string      url;
    string      htmlUrl;
    string      bigDataUrl;
}


struct Bed( T ){
    BedMetadata metadata;
    TrackLine   trackLine;
    T[]         bedDataList;
}

TrackLine trackLineReader( in char[] trackLine ){
    TrackLine result;

    size_t nameStart            = trackLine.countUntil("name=\"");
    if( nameStart != -1 ){
        size_t nameEnd          = nameStart + trackLine[nameStart .. $ ].countUntil('"');
        result.name             = trackLine[nameStart .. nameEnd].idup;
    }

    size_t descriptionStart     = trackLine.countUntil("description=\"");
    if( descriptionStart != -1 ){
        size_t descriptionEnd   = descriptionStart + trackLine[descriptionStart .. $ ].countUntil('"');
        result.description      = trackLine[descriptionStart .. descriptionEnd].idup;
    }

    size_t typeStart            = trackLine.countUntil("type=\"");
    if( typeStart != -1 ){
        size_t typeEnd          = typeStart + trackLine[typeStart .. $ ].countUntil('"');
        result.type             = trackLine[typeStart .. typeEnd].idup;
    }

    size_t visibilityStart      = trackLine.countUntil("visibility=");
    if( typeStart != -1 ){
        size_t visibilityEnd    = visibilityStart + trackLine[visibilityStart .. $].countUntil(' ');
        result.visibility       = to!size_t(trackLine[typeStart .. visibilityEnd]);
    }

    size_t colorStart           = trackLine.countUntil("color=\"");
    if( typeStart != -1 ){
        size_t colorEnd         = colorStart + trackLine[colorStart .. $ ].countUntil('"');
        result.color            = array( map!(to!size_t)(trackLine[typeStart .. colorEnd].idup.split(",")) );
    }

    size_t itemRgbStart         = trackLine.countUntil("itemRgb=\"");
    if( typeStart != -1 ){
        size_t itemRgbEnd       = itemRgbStart + trackLine[itemRgbStart .. $ ].countUntil('"');
        result.itemRgb          = trackLine[typeStart .. itemRgbEnd].idup;
    }

    size_t useScoreStart        = trackLine.countUntil("useScore=");
    if( typeStart != -1 ){
        size_t useScoreEnd      = nameStart + trackLine[nameStart .. $ ].countUntil(' ');
        result.useScore         = to!size_t(trackLine[typeStart .. useScoreEnd]);
    }

    size_t groupStart           = trackLine.countUntil("group=\"");
    if( typeStart != -1 ){
        size_t groupEnd         = groupStart + trackLine[groupStart .. $ ].countUntil('"');
        result.group            = trackLine[typeStart .. groupEnd].idup;
    }

    size_t dbStart              = trackLine.countUntil("db=\"");
    if( typeStart != -1 ){
        size_t dbEnd            = dbStart + trackLine[dbStart .. $ ].countUntil('"');
        result.db               = trackLine[typeStart .. dbEnd].idup;
    }

    size_t offsetStart          = trackLine.countUntil("offset=");
    if( typeStart != -1 ){
        size_t offsetEnd        = offsetStart + trackLine[offsetStart .. $ ].countUntil(' ');
        result.offset           = to!size_t(trackLine[typeStart .. offsetEnd]);
    }

    size_t maxItemsStart        = trackLine.countUntil("maxItems=");
    if( typeStart != -1 ){
        size_t maxItemsEnd      = maxItemsStart + trackLine[maxItemsStart .. $ ].countUntil(' ');
        result.maxItems         = to!size_t(trackLine[typeStart .. maxItemsEnd]);
    }

    size_t urlStart             = trackLine.countUntil("url=\"");
    if( typeStart != -1 ){
        size_t urlEnd           = urlStart + trackLine[urlStart .. $ ].countUntil('"');
        result.url              = trackLine[typeStart .. urlEnd].idup;
    }

    size_t htmlUrlStart         = trackLine.countUntil("htmlUrl=\"");
    if( typeStart != -1 ){
        size_t htmlUrlEnd       = htmlUrlStart + trackLine[htmlUrlStart .. $ ].countUntil('"');
        result.htmlUrl          = trackLine[typeStart .. htmlUrlEnd].idup;
    }

    size_t bigDataUrlStart      = trackLine.countUntil("bigDataUrl=\"");
    if( typeStart != -1 ){
        size_t bigDataUrlEnd    = nameStart + trackLine[bigDataUrlStart .. $ ].countUntil('"');
        result.bigDataUrl       = trackLine[typeStart .. bigDataUrlEnd].idup;
    }

    return result;
}

auto bedReader( T = BedData3 )(  in char[] filePath, char delimiter='\t' ){
    if( !filePath.exists )
        throw new  FileException( "File %s is do not exist".format(filePath) );
    else if( !filePath.isFile )
        throw new  FileException( "File %s is not a file".format(filePath) );

    File bedFile    = File( to!string(filePath), "r" );
    BedMetadata metadata;
    TrackLine   trackLine;
    Bed!(T)     bedInstance;
    const string browserToken1  = "browser position";
    const string browserToken2  = "browser hide";
    const string trackToken     = "track ";

    foreach( char[] line; bedFile.byLine() ){
        if( line.startsWith( '#' ) )    // comment
            continue;
        else if( line.empty )           // empty line
            continue;
        else if( line.startsWith(browserToken1) ){
            size_t colonIndex               = line.countUntil(':');
            size_t minusIndex               = line[colonIndex .. $].countUntil('-');
            string reversed                 = to!string( retro( line[browserToken1.length .. colonIndex] ) );
            size_t spaceIndexBeforeChrom    = reversed.countUntil(' ');
            size_t spaceIndexAfterPosition  = line[minusIndex..$].countUntil(' ');
            size_t endPositionIndex         = 0;
            if(spaceIndexAfterPosition == -1)
                endPositionIndex = line.length;
            else
                endPositionIndex = colonIndex + minusIndex + spaceIndexAfterPosition;
            metadata.chromosome     = to!string( retro(reversed[0 .. spaceIndexBeforeChrom]) );
            metadata.browserStart   = to!size_t(line[colonIndex + 1              .. colonIndex + minusIndex]);
            metadata.browserEnd     = to!size_t(line[colonIndex + minusIndex + 1 .. endPositionIndex]);
        }
        else if( line.startsWith(browserToken2) ){
            if(line.length > browserToken2.length + 1)
                metadata.hide = line[browserToken2.length + 1 .. $].idup;
            else
                throw new Exception("Malformed metadata line");
        }
        else if( line.startsWith(trackToken) ){
            trackLine = trackLineReader( line );
        }
        else{                   // data in csv format
            auto records = csvReader!T(line, delimiter);
            bedInstance.bedDataList ~= records.front;
        }
    }

    bedInstance.metadata    = metadata;
    bedInstance.trackLine   = trackLine;

    return bedInstance;
}