Created
September 6, 2019 19:22
-
-
Save aarani/27fb48f9ea167e17d89c60e55425a0cb to your computer and use it in GitHub Desktop.
ABI DNA Chromatogram File Parser for C#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
namespace ABIParser | |
{ | |
public class ABIParser | |
{ | |
private readonly byte[] fileData; | |
private string Sequence { get; set; } | |
private int[] A, G, C, T; | |
public int[] BaseCalls { get; private set; } | |
public int[] QCalls { get; private set; } | |
public int TraceLength { get; private set; } | |
public int SeqLength { get; private set; } | |
/* sometimes when macintosh files are | |
* FTPed in binary form, they have 128 bytes | |
* of invalid data pre-pended to them. This constant | |
* allows ABITrace to handle that in a way that | |
* is invisible to the user. | |
*/ | |
private int macJunk = 0; | |
private const int absIndexBase = 26; //The file location of the Index pointer | |
private int PLOC, PCON; | |
//the next declaration is for the actual file pointers | |
private int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO; | |
public ABIParser(string path) | |
{ | |
fileData = File.ReadAllBytes(path); | |
if (!IsABI()) | |
{ | |
throw new FormatException("File is not a valid .ab1 file"); | |
} | |
SetIndex(); | |
SetBasecalls(); | |
SetQcalls(); | |
SetSeq(); | |
SetTraces(); | |
} | |
public string GetSequence() => | |
Sequence; | |
private void SetIndex() | |
{ | |
int DataCounter, PBASCounter, PLOCCounter, PCONCounter, NumRecords, indexBase; | |
byte[] RecNameArray = new byte[4]; | |
string RecName; | |
DataCounter = 0; | |
PBASCounter = 0; | |
PLOCCounter = 0; | |
PCONCounter = 0; | |
indexBase = GetIntAt(absIndexBase + macJunk); | |
NumRecords = GetIntAt(absIndexBase - 8 + macJunk); | |
for (int record = 0; record <= NumRecords - 1; record++) | |
{ | |
GetSubArray(ref RecNameArray, (indexBase + (record * 28))); | |
RecName = Encoding.ASCII.GetString(RecNameArray); | |
if (RecName.Equals("FWO_")) | |
FWO = indexBase + (record * 28) + 20; | |
if (RecName.Equals("DATA")) | |
{ | |
++DataCounter; | |
if (DataCounter == 9) | |
DATA9 = indexBase + (record * 28) + 20; | |
if (DataCounter == 10) | |
DATA10 = indexBase + (record * 28) + 20; | |
if (DataCounter == 11) | |
DATA11 = indexBase + (record * 28) + 20; | |
if (DataCounter == 12) | |
DATA12 = indexBase + (record * 28) + 20; | |
} | |
if (RecName.Equals("PBAS")) | |
{ | |
++PBASCounter; | |
if (PBASCounter == 2) | |
PBAS2 = indexBase + (record * 28) + 20; | |
} | |
if (RecName.Equals("PLOC")) | |
{ | |
++PLOCCounter; | |
if (PLOCCounter == 2) | |
PLOC = indexBase + (record * 28) + 20; | |
} | |
if (RecName.Equals("PCON")) | |
{ | |
++PCONCounter; | |
if (PCONCounter == 2) | |
PCON = indexBase + (record * 28) + 20; | |
} | |
} //next record | |
TraceLength = GetIntAt(DATA12 - 8); | |
SeqLength = GetIntAt(PBAS2 - 4); | |
PLOC = GetIntAt(PLOC) + macJunk; | |
DATA9 = GetIntAt(DATA9) + macJunk; | |
DATA10 = GetIntAt(DATA10) + macJunk; | |
DATA11 = GetIntAt(DATA11) + macJunk; | |
DATA12 = GetIntAt(DATA12) + macJunk; | |
PBAS2 = GetIntAt(PBAS2) + macJunk; | |
PCON = GetIntAt(PCON) + macJunk; | |
} | |
private void SetBasecalls() | |
{ | |
BaseCalls = new int[SeqLength]; | |
byte[] qq = new byte[SeqLength * 2]; | |
GetSubArray(ref qq, PLOC); | |
using (MemoryStream ms = new MemoryStream(qq)) | |
using (BinaryReader reader = new BinaryReader(ms)) | |
for (int i = 0; i <= SeqLength - 1; ++i) | |
{ | |
BaseCalls[i] = ReadInt16BE(reader); | |
} | |
} | |
private void SetQcalls() | |
{ | |
QCalls = new int[SeqLength]; | |
byte[] qq = new byte[SeqLength]; | |
GetSubArray(ref qq, PCON); | |
using (MemoryStream ms = new MemoryStream(qq)) | |
for (int i = 0; i <= SeqLength - 1; ++i) | |
{ | |
QCalls[i] = ms.ReadByte(); | |
} | |
} | |
private void SetSeq() | |
{ | |
char[] tempseq = new char[SeqLength]; | |
for (int x = 0; x <= SeqLength - 1; ++x) | |
{ | |
tempseq[x] = (char)fileData[PBAS2 + x]; | |
} | |
Sequence = new string(tempseq); | |
} | |
private void SetTraces() | |
{ | |
int[] pointers = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T | |
int[] datas = new int[4]; | |
char[] order = new char[4]; | |
datas[0] = DATA9; | |
datas[1] = DATA10; | |
datas[2] = DATA11; | |
datas[3] = DATA12; | |
for (int i = 0; i <= 3; i++) | |
{ | |
order[i] = (char)fileData[FWO + i]; | |
} | |
for (int i = 0; i <= 3; i++) | |
{ | |
switch (order[i]) | |
{ | |
case 'A': | |
case 'a': | |
pointers[0] = datas[i]; | |
break; | |
case 'C': | |
case 'c': | |
pointers[1] = datas[i]; | |
break; | |
case 'G': | |
case 'g': | |
pointers[2] = datas[i]; | |
break; | |
case 'T': | |
case 't': | |
pointers[3] = datas[i]; | |
break; | |
default: | |
throw new ArgumentException("Trace contains illegal values."); | |
} | |
} | |
A = new int[TraceLength]; | |
C = new int[TraceLength]; | |
G = new int[TraceLength]; | |
T = new int[TraceLength]; | |
for (int i = 0; i <= 3; i++) | |
{ | |
byte[] qq = new byte[TraceLength * 2]; | |
GetSubArray(ref qq, pointers[i]); | |
using (MemoryStream ms = new MemoryStream(qq)) | |
using (BinaryReader reader = new BinaryReader(ms)) | |
for (int x = 0; x <= TraceLength - 1; x++) | |
{ | |
if (i == 0) A[x] = ReadInt16BE(reader); | |
if (i == 1) C[x] = ReadInt16BE(reader); | |
if (i == 2) G[x] = ReadInt16BE(reader); | |
if (i == 3) T[x] = ReadInt16BE(reader); | |
} | |
} | |
return; | |
} | |
private int GetIntAt(int pointer) | |
{ | |
int @out = 0; | |
byte[] temp = new byte[4]; | |
GetSubArray(ref temp, pointer); | |
using (MemoryStream ms = new MemoryStream(temp)) | |
using (BinaryReader reader = new BinaryReader(ms)) | |
@out = ReadInt32BE(reader); | |
return @out; | |
} | |
private static int ReadInt32BE(BinaryReader reader) | |
{ | |
return BitConverter.ToInt32(reader.ReadBytes(4).Reverse().ToArray(), 0); | |
} | |
private static int ReadInt16BE(BinaryReader reader) | |
{ | |
return BitConverter.ToInt16(reader.ReadBytes(2).Reverse().ToArray(), 0); | |
} | |
private void GetSubArray(ref byte[] output, int traceDataOffset) | |
{ | |
for (int x = 0; x <= output.Length - 1; x++) | |
{ | |
output[x] = fileData[traceDataOffset + x]; | |
} | |
} | |
private bool IsABI() | |
{ | |
char[] ABI = new char[4]; | |
for (int i = 0; i <= 2; i++) | |
{ | |
ABI[i] = (char)fileData[i]; | |
} | |
if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) | |
{ | |
return true; | |
} | |
else | |
{ | |
for (int i = 128; i <= 130; i++) | |
{ | |
ABI[i - 128] = (char)fileData[i]; | |
} | |
if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I')) | |
{ | |
macJunk = 128; | |
return true; | |
} | |
else | |
return false; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment