Created
July 23, 2014 02:28
-
-
Save brantfaircloth/c67b8f01f47cf0757de7 to your computer and use it in GitHub Desktop.
check a nexus file's site patterns
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """ | |
| (c) 2014 Brant Faircloth || http://faircloth-lab.org/ | |
| All rights reserved. | |
| This code is distributed under a 3-clause BSD license. Please see | |
| LICENSE.txt for more information. | |
| Created on 16 July 2014 13:56 PDT (-0700) | |
| """ | |
| import re | |
| import numpy | |
| from collections import Counter | |
| all = [] | |
| with open("Q30-QD2-MISS_0.75.recode.snapp.nexus", "rU") as infile: | |
| for line in infile: | |
| if line.startswith("wesj"): | |
| result = re.search(".*\s+(.*)\n", line) | |
| all.append(list(result.groups()[0])) | |
| a = numpy.asarray(all) | |
| # slice by column | |
| characters = 0 | |
| with_missing = 0 | |
| no_missing = 0 | |
| cnt = Counter() | |
| for colnum in xrange(a.shape[1]): | |
| characters += 1 | |
| col = a[:, colnum] | |
| colset = set(col) | |
| if "?" in colset: | |
| with_missing += 1 | |
| if "?" not in colset: | |
| no_missing += 1 | |
| cnt[" ".join(list(colset))] += 1 | |
| print "characters", characters | |
| print "sites with missing", with_missing | |
| print "sites with no missing", no_missing | |
| print cnt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment