Created
December 21, 2019 19:13
-
-
Save cpl/8d1d53e81657ee655f27717c858ef576 to your computer and use it in GitHub Desktop.
Parsing 23andMe genome data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"strconv" | |
) | |
// SNP or Single-nucleotide polymorphism | |
type SNP struct { | |
RSID string | |
Chromosome string | |
Position int | |
Genotype string | |
} | |
type Genome []SNP | |
func checkErr(err error) { | |
if err != nil { | |
panic(err) | |
} | |
} | |
func tsvLineToSNP(line []byte) (snp SNP, err error) { | |
data := bytes.Split(line, []byte("\t")) | |
if l := len(data); l != 4 { | |
return SNP{}, fmt.Errorf("invalid len for SNP TSV, %d", l) | |
} | |
snp.Position, err = strconv.Atoi(string(data[2])) | |
if err != nil { | |
return SNP{}, fmt.Errorf("invalid position for SNP, %w", err) | |
} | |
snp.RSID = string(data[0]) | |
snp.Chromosome = string(data[1]) | |
snp.Genotype = string(data[3]) | |
return snp, nil | |
} | |
func readGenome(reader io.Reader) (genome Genome, err error) { | |
scanner := bufio.NewScanner(reader) | |
scanner.Split(bufio.ScanLines) | |
for scanner.Scan() { | |
snp, err := tsvLineToSNP(scanner.Bytes()) | |
if err != nil { | |
return nil, fmt.Errorf("failed parsing tsv line, %w", err) | |
} | |
genome = append(genome, snp) | |
} | |
return genome, scanner.Err() | |
} | |
func main() { | |
fp, err := os.Open("genome.txt") | |
checkErr(err) | |
defer fp.Close() | |
genome, err := readGenome(fp) | |
if err != nil { | |
checkErr(err) | |
} | |
fmt.Println(len(genome)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment