Skip to content

Instantly share code, notes, and snippets.

@brantfaircloth
Created July 23, 2014 02:28
Show Gist options
  • Save brantfaircloth/c67b8f01f47cf0757de7 to your computer and use it in GitHub Desktop.
Save brantfaircloth/c67b8f01f47cf0757de7 to your computer and use it in GitHub Desktop.
check a nexus file's site patterns
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
(c) 2014 Brant Faircloth || http://faircloth-lab.org/
All rights reserved.
This code is distributed under a 3-clause BSD license. Please see
LICENSE.txt for more information.
Created on 16 July 2014 13:56 PDT (-0700)
"""
import re
import numpy
from collections import Counter
all = []
with open("Q30-QD2-MISS_0.75.recode.snapp.nexus", "rU") as infile:
for line in infile:
if line.startswith("wesj"):
result = re.search(".*\s+(.*)\n", line)
all.append(list(result.groups()[0]))
a = numpy.asarray(all)
# slice by column
characters = 0
with_missing = 0
no_missing = 0
cnt = Counter()
for colnum in xrange(a.shape[1]):
characters += 1
col = a[:, colnum]
colset = set(col)
if "?" in colset:
with_missing += 1
if "?" not in colset:
no_missing += 1
cnt[" ".join(list(colset))] += 1
print "characters", characters
print "sites with missing", with_missing
print "sites with no missing", no_missing
print cnt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment