lennax · August 11, 2021 16:14
diff --git a/CoordinateMapper.py b/CoordinateMapper.py
 # CoordinateMapper -- map between genomic, cds, and protein coordinates
 # AUTHOR: Reece Hart <[email protected]>
 # Modifications: Lenna X. Peterson <[email protected]>
 # LICENSE: BioPython

 # Examples:
 # AB026906.1:g.7872G>T
 # AB026906.1:c.274G>T
 # BA...:p.Asp92Tyr
 #
 # All refer to congruent variants. A coordinate mapper is needed to
 # translate between at least these three coordinate frames.  The mapper
 # should deal with specialized syntax for splicing and UTR (e.g., 88+1,
 # 89-2, -14, *46). In addition, care should be taken to ensure consistent 0-
 # or 1-based numbering (0 internally, as with Python/BioPython and
 # Perl/BioPerl).
 #
 # g   -----------00000000000-----1111111----------22222222222*222-----
 #                s0        e0    s1    e1         s2            e2
 #                \         \     |     |          /             /
 #                 +--+      +--+ |     | +-------+     +-------+
 #                     \         \|     |/             /
 # c                   00000000000111111122222222222*222
 #                               c0     c1             c2
 #                     aaabbbcccdddeeefffggghhhiiijj*kkk
 # p                     A  B  C  D  E  F  G  H  I  J  K
 #                       p0 p1 p2 ...                  pn
 #
 #
 # TODO:
 # * g2c returns index + extended syntax
 # * c2g accepts index + extended syntax

 from functools import wraps
 from math import floor
 import warnings

 import MapPositions  # required for pos_factory decorator
 from MapPositions import CDSPositionError, ProteinPositionError
 from MapPositions import GenomePosition, CDSPosition, ProteinPosition

 from Bio import BiopythonParserWarning
 # FIXME change to absolute import once CompoundLocation is merged to master
 from SeqFeature import FeatureLocation


 class CoordinateMapper(object):
    """Convert positions between genomic, CDS, and protein coordinates."""

    def __init__(self, selist):
        """Set exons to be used for mapping.

        Parameters
        ----------
        selist : SeqRecord, SeqFeature, list
            Object containing exon information
        """
        self._exons = self._get_exons(selist)

    @staticmethod
    def _get_exons(seq):
        """Extract exons from SeqRecord, SeqFeature, or list of pairs.

        Parameters
        ----------
        seq : SeqRecord, SeqFeature, list
            Object containing exon information.

        Returns
        -------
        SeqFeature.FeatureLocation
        """
        # Try as SeqRecord
        if hasattr(seq, 'features'):
            # generator
            cdsf = (f for f in seq.features if f.type == 'CDS').next()
            return cdsf.location
        # Try as SeqFeature
        elif hasattr(seq, 'location'):
            if seq.type != 'CDS':
                # FIXME should this be a fatal error?
                warnings.warn("Provided SeqFeature should be CDS",
                              BiopythonParserWarning)
            return seq.location
        # Try as list of pairs
        return sum([FeatureLocation(s, e) for s, e in seq])

    @property  # read-only
    def exons(self):
        return self._exons

    @property  # read-only
    def exon_list(self):
        return list(self.exons)

    def pos_factory(pos_type):
        """
        Convert string or int pos to appropriate Position object

        Parameters
        ----------
        pos_type : str
            Position type (Genome, CDS, Protein)
        """
        def wrapper(fn):
            @wraps(fn)
            def make_pos(self, pos, dialect=None):
                # retrieve Position object
                _obj = getattr(MapPositions, pos_type + "Position")
                # if pos is not already Position object, make it one
                if not isinstance(pos, _obj):
                    # no dialect: use default constructor
                    if dialect is None:
                        pos = _obj(pos)
                    # use dialect alternate constructor
                    else:
                        pos = _obj.from_dialect(dialect, pos)
                # call function with new pos
                return fn(self, pos, dialect)
            return make_pos
        return wrapper

    @pos_factory("Genome")
    def g2c(self, gpos, dialect=None):
        """Convert integer from genomic to CDS coordinates

        Parameters
        ----------
        gpos : int
            Genomic position
        dialect : str
            Coordinate dialect (GenBank or HGVS, default None)

        Returns
        -------
        CDSPosition
        """
        gpos = int(gpos)
        fmts = CDSPosition.fmt_dict

        def _simple_g2c(g):
            return CDSPosition(self.exon_list.index(g))

        # within exon
        if gpos in self.exons:
            return _simple_g2c(gpos)
        # before CDS
        if gpos < self.exons.start:
            return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start))
        # after CDS
        if gpos >= self.exons.end:
            return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1))
        # intron
        # set start of first intron
        prev_end = self.exons.parts[0].end
        for part in self.exons.parts[1:]:
            # not in this intron
            if gpos > part.start:
                prev_end = part.end
                continue
            len_intron = part.start - prev_end
            # second half (exclusive) of intron
            #       offset     >     middle of intron
            if gpos - prev_end > floor(len_intron / 2.0):
                anchor = _simple_g2c(part.start)
                offset = gpos - part.start
                assert offset < 0
            # first half (inclusive) of intron
            else:
                anchor = _simple_g2c(prev_end - 1)
                offset = gpos - prev_end + 1
                assert offset > 0
            assert self.check_intron(anchor, offset)
            return CDSPosition(fmts['intron'] % (anchor, offset))

        assert False  # function should return for every integer

    # TODO verify that values of offset are sane
    def check_intron(self, anchor, offset):
        """
        Verify that CDS-relative intron position is valid with given exons.

        Parameters
        ----------
        anchor : int
            Intron anchor (closest CDS position)
        offset : int
            Intron offset (distance to anchor)

        Returns
        -------
        bool
        """
        for exon in self.exons.parts:
            start = int(self.g2c(exon.start))
            if anchor == start:
                if offset > 0:
                    raise CDSPositionError(
                "Invalid intron: offset from exon start must be negative.")
                return True
            end = int(self.g2c(exon.end - 1))
            if anchor == end:
                if offset < 0:
                    raise CDSPositionError(
                "Invalid intron: offset from exon end must be positive.")
                return True
        raise CDSPositionError(
            "Invalid intron: anchor must be start or end of an exon.")

    def get_strand(self, gpos):
        for exon in self.exons.parts:
            if gpos in exon:
                return exon.strand
        raise ValueError("Provided gpos must be exon")

    @pos_factory("CDS")
    def c2g(self, cpos, dialect=None):
        """Convert from CDS to genomic coordinates

        Parameters
        ----------
        cpos : int
            CDS position
        dialect : str
            Coordinate dialect (GenBank or HGVS, default None)

        Returns
        -------
        GenomePosition
        """
        if cpos.pos_type == "pre-CDS":
            return GenomePosition(self.exons.start + cpos.offset)
        elif cpos.pos_type == "post-CDS":
            return GenomePosition(self.exons.end - 1 + cpos.offset)

        g_anchor = self.exon_list[cpos.anchor]
        if cpos.pos_type == "exon":
            strand = self.get_strand(g_anchor)
            return GenomePosition(g_anchor, strand=strand)
        elif cpos.pos_type == "intron":
            offset = cpos.offset
            if self.check_intron(cpos.anchor, offset):
                return GenomePosition(g_anchor + offset)

        assert False  # all positions should be one of the 4 types

    @pos_factory("CDS")
    def c2p(self, cpos, dialect=None):
        """Convert from CDS to protein coordinates

        Parameters
        ----------
        cpos : int
            CDS position
        dialect : str
            Coordinate dialect (GenBank or HGVS, default None)

        Returns
        -------
        ProteinPosition
        """
        try:
            cpos = int(cpos)
        except TypeError:
            raise CDSPositionError("'%s' does not correspond to a protein"
                                   % repr(cpos))
        return ProteinPosition(int(cpos / 3.0))

    @pos_factory("Genome")
    def g2p(self, gpos, dialect=None):
        """Convert integer from genomic to protein coordinates

        Parameters
        ----------
        gpos : int
            Genomic position
        dialect : str
            Coordinate dialect (GenBank or HGVS, default None)

        Returns
        -------
        ProteinPosition
        """
        return self.c2p(self.g2c(gpos))

    @pos_factory("Protein")
    def p2c(self, ppos, dialect=None):
        """Convert integer from protein coordinate to CDS closed range

        Parameters
        ----------
        ppos : int
            Protein position
        dialect : str
            Coordinate dialect (GenBank or HGVS, default None)

        Returns
        -------
        CDSPosition
        """
        try:
            ppos = int(ppos)
        except TypeError:
            return None
        if ppos < 0:
            raise ProteinPositionError("'%s' should not be negative")
        # FIXME is CDS guaranteed to have len % 3 == 0?
        first_base = (ppos) * 3
        last_base = first_base + 2
        if last_base > len(self.exons):
            raise ProteinPositionError("'%s' is too large")
        return (CDSPosition(first_base), CDSPosition(last_base))

    @pos_factory("Protein")
    def p2g(self, ppos, dialect=None):
        """Convert integer from protein to genomic coordinates

        Parameters
        ----------
        ppos : int
            Protein position
        dialect : str
            Coordinate dialect (GenBank or HGVS, default None)

        Returns
        -------
        GenomePosition
        """
        return tuple(self.c2g(x) for x in self.p2c(ppos))


 if __name__ == '__main__':
    # The following exons are from AB026906.1.
    # test case: g.7872 -> c.274 -> p.92
    # N.B. These are python counting coordinates (0-based)
    exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]

    def test_list(g_range):
        cm = CoordinateMapper(exons)
        for g1 in g_range:
            print g1,
            c1 = cm.g2c(g1)
            print c1,
            p1 = cm.c2p(c1)
            print p1,
            if p1:
                c2 = cm.p2c(p1)[0]
            else:
                c2 = c1
            print ' | ', c2,
            g2 = cm.c2g(c2)
            print g2

        print cm.g2p(7872)
        print cm.p2g(92)

    def test_simple():
        from SeqFeature import SeqFeature
        location = sum([FeatureLocation(2, 4, +1),
                        FeatureLocation(8, 11, +1),
                        FeatureLocation(16, 18, +1)])
        simple_exons = SeqFeature(location, type="CDS")
        cm = CoordinateMapper(simple_exons)
        print cm.exons
        print list(cm.exons)
        print range(len(cm.exons))
        for i in range(len(cm.exons)):
            print "%3s" % cm.c2g(i),
        print
        for i in xrange(20):
            print "%3d" % i,
        print
        for i in xrange(20):
            print "%3s" % cm.g2c(i),
        print

    r1 = (7870, 7871, 7872, 7873, 7874)
    r2 = (5807, 5808, 5809,
          6871, 6872, 6873, 6874, 6875,
          7766, 7767, 7768, 7769,
          13784, 13785, 13786)
    test_list(r1)
    #test_list(r2)
    print
    test_simple()
diff --git a/MapPositions.py b/MapPositions.py
 from copy import copy
 import re


 class InvalidPositionError(ValueError):
    "Exception for bad coordinates"


 class GenomePositionError(InvalidPositionError):
    "Exception for bad genome coordinates"


 class CDSPositionError(InvalidPositionError):
    "Exception for bad CDS coordinates"


 class ProteinPositionError(InvalidPositionError):
    "Exception for bad protein coordinates"


 class MapPosition(object):
    """Generic position for coordinate mapping."""
    def __init__(self, pos, index=None, **kwargs):
        """Init class from HGVS position.

        Parameters
        ----------
        pos : int, str
            Position to convert
        index : int
            Start of counting (e.g. 1 for GenBank or HGVS)
        """
        self.index = index
        if pos and self.index:
            pos -= index
        self.pos = pos

    @classmethod
    def from_dialect(cls, dialect, pos, strand=None):
        """Init class from given dialect.

        Parameters
        ----------
        dialect : str
            input dialect (HGVS or GenBank)
        pos : int, str
            dialect position to convert
        strand : int
            Strand of position (-1 or +1, default None)

        Returns
        -------
        cls
        """
        if dialect is None:
            return cls(pos, strand=strand)
        try:
            from_func = getattr(cls, "from_" + dialect.lower())
        except AttributeError:
            raise ValueError("Dialect '%s' not valid" % dialect)
        return from_func(pos, strand)

    @classmethod
    def from_hgvs(cls, hgvs_pos, strand=None):
        """Init class from HGVS position.

        Parameters
        ----------
        hgvs_pos : int, str
            HGVS position to convert
        strand : int
            Strand of position (-1 or +1, default None)

        Returns
        -------
        cls
        """
        return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d")

    @classmethod
    def from_genbank(cls, gbk_pos, strand=None):
        """Init class from GenBank position.

        Parameters
        ----------
        gbk_pos : int, str
            GenBank position to convert
        strand : int
            Strand of position (-1 or +1, default None)

        Returns
        -------
        cls
        """
        return cls(gbk_pos, index=1, strand=strand)

    def to(self, dialect):
        """Convert position to specified dialect.

        Parameters
        ----------
        dialect : str
            Output dialect (HGVS or GenBank)
        """
        if dialect is None:
            return self.to_str()
        try:
            to_func = getattr(self, "to_" + dialect.lower())
        except AttributeError:
            raise ValueError("Dialect '%s' not valid" % dialect)
        return to_func()

    def to_hgvs(self):
        """Convert position to HGVS"""
        if self.pos or self.pos == 0:
            return self.pos + 1
        return None

    def to_genbank(self):
        """Convert position to GenBank"""
        if self.pos or self.pos == 0:
            return self.pos + 1
        return None

    def to_str(self):
        """Make string representation without conversion"""
        return self.pos

    def __str__(self):
        """String representation"""
        return str(self.to_str())

    def __int__(self):
        """Integer representation"""
        return self.pos

    def __repr__(self):
        """Detailed representation for debugging"""
        return "%s(%s)" % (self.__class__.__name__, self.to_str())

 class GenomePosition(MapPosition):
    """Genome position for coordinate mapping"""
    def __init__(self, gpos, index=None, strand=None, **kwargs):
        # FIXME if index is string, error may be raised
        if gpos < (index or 0):
            raise GenomePositionError("Genome position cannot be negative.")
        # call superclass constructor
        MapPosition.__init__(self, gpos, index)
        self.strand = strand

    def __eq__(self, other):
        """Compare equal to other GenomePosition with same pos
        or integer equal to pos"""
        if isinstance(other, int):
            return self.pos == other
        return isinstance(other, GenomePosition) and self.pos == other.pos


 class ProteinPosition(MapPosition):
    """Protein position for coordinate mapping"""
    def __init__(self, ppos, index=None, **kwargs):
        # call superclass constructor
        MapPosition.__init__(self, ppos, index)

    def __eq__(self, other):
        """Compare equal to other ProteinPosition with same pos
        or integer equal to pos"""
        if isinstance(other, int):
            return self.pos == other
        return isinstance(other, ProteinPosition) and self.pos == other.pos


 class CDSPosition(MapPosition):
    """CDS position for coordinate mapping"""
    def __init__(self, cpos, index=None,
                 pre_fmt=None, post_fmt=None, **kwargs):
        # Dispatch types and return anchor, offset
        if isinstance(cpos, int):
            anchor, offset = self.parse_int(cpos)
        elif isinstance(cpos, str):
            anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt)
        else:
            raise CDSPositionError("'%s' is of unknown type" % cpos)
        # Set instance anchor and offset
        # call superclass constructor
        MapPosition.__init__(self, anchor, index)
        self._offset = offset
        self.validate()

    @classmethod
    def from_anchor(cls, anchor=None, offset=None):
        """Init CDSPosition with anchor, offset pair.

        Parameters
        ----------
        anchor : int
            CDS anchor (coordinate of nearest exon position)
        offset : int
            Offset from nearest exon position

        Returns
        -------
        CDSPosition
        """
        if not anchor:
            pos = cls("%+d" % offset)
        elif anchor < 0:
            raise CDSPositionError("Anchor cannot be negative.")
        else:
            pos = cls(anchor)
            pos.offset = offset
        return pos

    @property
    def offset(self):
        return self._offset

    @offset.setter
    def offset(self, val):
        "Validate new offset, then update"
        self.validate(offset=val)
        self._offset = val

    @property
    def anchor(self):
        return self.pos

    @anchor.setter
    def anchor(self, val):
        "Validate new anchor, then update pos"
        self.validate(anchor=val)
        self.pos = val

    def validate(self, anchor="sentinel", offset="sentinel"):
        """Check whether anchor and offset yield a valid position.

        Parameters
        ----------
        anchor : int
            CDS anchor (coordinate of nearest exon position)
        offset : int
            Offset from nearest exon position

        Returns
        -------
        bool
        """

        if anchor == "sentinel":
            anchor = self.anchor
        if offset == "sentinel":
            offset = self.offset
        if offset == 0:
            raise CDSPositionError(
                "Offset may not be 0. For no offset, use None.")
        if not anchor and anchor != 0 and not offset:
            raise CDSPositionError(
                "At least one of pos or offset must be defined")
        if anchor and anchor < 0:
            raise CDSPositionError("CDS anchor may not be negative.")
        return True

    @property
    def pos_type(self):
        "Type of CDS position, dynamically determined from values"
        # inside CDS
        if self.pos or self.pos == 0:
            if not self.offset:
                return "exon"
            return "intron"
        # outside CDS
        elif self.offset > 0:
            return "post-CDS"
        return "pre-CDS"
        assert False  # all integers should return

    @property
    def sub_dict(self):
        if self.pos_type == 'intron':
            return {'pos': self.pos, 'offset': self.offset}
        if self.pos_type == 'exon':
            return {'pos': self.pos}
        if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS':
            return {'offset': self.offset}

    fmt_dict = {
        'exon': "%d",
        'intron': "%d%+d",
        'post-CDS': "%+d",
        'pre-CDS': "%+d",
    }

    @staticmethod
    def _shift_index(pos_dict, idx):
        """Increment value of dict key 'pos' by given index.

        Parameters
        ----------
        pos_dict : dict
            Dictionary to search for 'pos'
        idx : int
            Index to add to pos_dict['pos']

        Returns
        -------
        dict
        """
        if 'pos' in pos_dict and idx:
            pos_dict['pos'] += idx
        return pos_dict

    def _make_str(self, val_dict=None, fmt_dict=None):
        """Retrieve format string and substitute values.

        Parameters
        ----------
        val_dict : dict
            Dictionary of values to substitute into string
        fmt_dict : dict
            Dictionary of format strings for each pos type

        Returns
        -------
        str
        """
        # set default dicts if parameter dicts are false
        fmt_dict = fmt_dict or self.fmt_dict
        val_dict = val_dict or self.sub_dict
        return fmt_dict[self.pos_type] % tuple(val_dict.values())

    @staticmethod
    def parse_int(cpos):
        """Parse int to anchor, offset pair

        Parameters
        ----------
        cpos : int
            Integer position to convert

        Returns
        -------
        tuple
        """
        # treat negative int as offset
        if cpos < 0:
            return (None, cpos)
        # treat positive int as anchor
        return (cpos, None)

    @staticmethod
    def parse_str(cpos, pre_fmt, post_fmt):
        """Parse string to anchor, offset pair

        Parameters
        ----------
        cpos : str
            String position to convert
        pre_fmt : str
            Format string for pre-CDS positions
        post_fmt : str
            Format string for post-CDS positions

        Returns
        -------
        tuple
        """
        delimiters = "\+\-"
        if post_fmt and "*" in post_fmt:
            delimiters += "\*"
        # parenth causes split pattern to be kept
        delim_rx = re.compile("([%s])" % delimiters)
        parsed = delim_rx.split(cpos, 1)
        if len(parsed) == 1:  # may be int
            return CDSPosition.parse_int(int(cpos))
        # 1 split is normally length 2 but delimiter is also kept
        elif len(parsed) != 3:
            raise CDSPositionError(
                "String '%s' not parseable for this position." % cpos)
        if parsed[0] == "":
            anchor = None
        else:
            anchor = int(parsed[0])
        if post_fmt and parsed[1] == "*" == post_fmt[0]:
            parsed[1] = "+"
        offset = int("".join(parsed[1:]))
        return (anchor, offset)

    def to_hgvs(self):
        """Convert CDS position to HGVS"""
        fmt_dict = copy(self.fmt_dict)
        fmt_dict['post-CDS'] = "*%d"
        sub_dict = self._shift_index(self.sub_dict, 1)
        return self._make_str(sub_dict, fmt_dict)

    def to_genbank(self):
        """Convert CDS position to GenBank"""
        sub_dict = self._shift_index(self.sub_dict, 1)
        return self._make_str(sub_dict)

    def to_str(self):
        """Make string representation of CDS position"""
        return self._make_str()

    def __int__(self):
        """Integer representation of CDS exon, otherwise NotImplemented"""
        if self.pos_type == "exon":
            return MapPosition.__int__(self)
        return NotImplemented

    def __eq__(self, other):
        """Compare equal to other MapPosition with same pos and offset
        or int if exon"""
        if isinstance(other, int) and self.pos_type == "exon":
            return self.pos == other
        return isinstance(other, CDSPosition) and \
                self.pos == other.pos and \
                self.offset == other.offset


 if __name__ == "__main__":

    def print_pos(pos_obj):
        print "object:", pos_obj
        print "repr:", repr(pos_obj)
        print "HGVS:", pos_obj.to_hgvs()
        print

    g = GenomePosition.from_hgvs(6)
    print_pos(g)

    test_g = GenomePosition(5)
    #test_c = CDSPosition("6+1")
    #test_c = CDSPosition.from_hgvs("6+1")
    test_c = CDSPosition.from_hgvs("*1")
    #test_c = CDSPosition(6)
    #test_c = CDSPosition(-1)
    print_pos(test_g)
    print test_c.pos_type
    print_pos(test_c)
diff --git a/testCoordinateMapper.py b/testCoordinateMapper.py
 #!/usr/bin/python

 from functools import wraps
 import unittest

 from CoordinateMapper import CoordinateMapper
 from MapPositions import GenomePositionError
 from MapPositions import ProteinPositionError
 from MapPositions import CDSPosition, CDSPositionError
 from SeqFeature import FeatureLocation, SeqFeature
 from Bio.SeqRecord import SeqRecord


 exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
 cmap = CoordinateMapper(exons)
 g_exons = xrange(7868, 7875)  # len 7
 c_exons = xrange(270, 279)  # len 9
 p_exons = xrange(90, 93)   # len 3
 p_exons_trip = [p for p in p_exons for n in range(3)]  # len 9
 c_exon_prs = ((270, 272), (273, 275), (276, 278))  # len 3
 g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876))  # len 3

 g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756),
             'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)}
 c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'),
             'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')}
 c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1))

 g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)}
 c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '*1', '*216')}
 c_outside_tups = ((None, -5808), (None, 1), (None, 216))


 def two_dialects(fn):
    @wraps(fn)
    def call_tests(self):
        orig_dialect = self.dialect
        for dialect in (None, 'HGVS'):
            self.dialect = dialect
            fn(self)
        self.dialect = orig_dialect
    return call_tests


 class TestCDSPosition(unittest.TestCase):
    """Test that CDSPosition works properly"""

    def setUp(self):
        self.dialect = None

    @two_dialects
    def testGoodIntron(self):
        """CDSPosition should match good intron values"""
        for c_args, c in zip(c_intron_tups, c_introns[self.dialect]):
            actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
            self.assertEqual(actual, c)

    @two_dialects
    def testGoodOutside(self):
        """CDSPosition should match good outside-CDS values"""
        for c_args, c in zip(c_outside_tups, c_outside[self.dialect]):
            actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
            self.assertEqual(actual, c)

    def testEqual(self):
        """CDSPosition should test equal with same args"""
        for args in c_intron_tups:
            CPos = CDSPosition.from_anchor
            self.assertEqual(CPos(*args), CPos(*args))
        self.assertEqual(str(CPos(*args)), str(CPos(*args)))

    #def testEqualDialects(self):
        #for c_pos in c_outside:
            #self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos))
            #self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs())

    def testBadAnchor(self):
        """CDSPosition should fail with negative CDS anchor"""
        self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1)

    def testBadOffset(self):
        """CDSPosition should fail with zero offset"""
        self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0)

    def testValidate(self):
        """Setting CDSPosition anchor offset should fail for invalid cases"""
        intron = CDSPosition("22+4")
        self.assertRaises(CDSPositionError, setattr, intron, "offset", 0)
        self.assertRaises(CDSPositionError, setattr, intron, "anchor", -5)
        exon = CDSPosition("60")
        self.assertRaises(CDSPositionError, setattr, exon, "anchor", None)

 class TestCoordinateMapper(unittest.TestCase):
    """Test that CoordinateMapper works properly"""

    def setUp(self):
        self.sf = SeqFeature(sum((FeatureLocation(s, e) for s, e in exons)),
                        type="CDS")

    def testListInit(self):
        """CoordinateMapper should take list of exon pairs"""
        cm = CoordinateMapper(exons)
        # FIXME CompoundLocation does not have __eq__
        self.assertEqual(str(cm.exons), str(self.sf.location))

    def testSeqRecordInit(self):
        """CoordinateMapper should use first CDS feature of SeqRecord"""
        sr = SeqRecord("", features=[self.sf])
        cm = CoordinateMapper(sr)
        self.assertEqual(str(cm.exons), str(self.sf.location))

    def testSeqFeatureInit(self):
        """CoordinateMapper should take a CDS SeqFeature"""
        cm = CoordinateMapper(self.sf)
        self.assertEqual(str(cm.exons), str(self.sf.location))

    def testEmptyInit(self):
        """CoordinateMapper should fail with no arguments"""
        self.assertRaises(Exception, CoordinateMapper)


 class Testg2c(unittest.TestCase):
    """Test success of good input and failure of bad input to g2c"""

    def setUp(self):
        self.dialect = None

    # what it should do

    # any integer should return
    def testGoodExons(self):
        """g2c should work for exon positions"""
        for arg, expected in zip(g_exons, c_exons):
            self.assertEqual(cmap.g2c(arg), expected)

    @two_dialects
    def testGoodIntronsStr(self):
        """g2c should work for intron positions (string)"""
        dia = self.dialect
        for arg, expect in zip(g_introns[dia], c_introns[dia]):
            actual = cmap.g2c(arg, dia).to(dia)
            self.assertEqual(actual, expect)

    @two_dialects
    def testGoodIntrons(self):
        """g2c should work for intron positions (CDSPosition)"""
        for arg, tup in zip(g_introns[self.dialect], c_intron_tups):
            actual = cmap.g2c(arg, self.dialect)
            expect = CDSPosition.from_anchor(*tup)
            self.assertEqual(actual, expect)
            self.assertEqual(str(actual), str(expect))

    @two_dialects
    def testGoodOutsideStr(self):
        """g2c should work for outside positions (string)"""
        dia = self.dialect
        for arg, expected in zip(g_outside[dia], c_outside[dia]):
            actual = cmap.g2c(arg, dia).to(dia)
            self.assertEqual(actual, expected)

    def testGoodOutside(self):
        """g2c should work for outside positions (CDSPosition)"""
        for arg, tup in zip(g_outside[self.dialect], c_outside_tups):
            actual = cmap.g2c(arg)
            expect = CDSPosition.from_anchor(*tup)
            self.assertEqual(actual, expect)
            self.assertEqual(str(actual), str(expect))

    # what it shouldn't do

    # should it handle non-exact positions?

    @two_dialects
    def testZeroArg(self):
        """g2c should work for 0 in no dialect and fail for 1-index"""
        args = (0, self.dialect)
        if self.dialect is None:
            cmap.g2c(*args)
        else:
            self.assertRaises(GenomePositionError, cmap.g2c, *args)

    @two_dialects
    def testBadArg(self):
        """g2c should fail on string, float, None, or negative"""
        bad = ("string", None, 3.14, -5)
        for arg in bad:
            self.assertRaises(Exception, cmap.g2c, arg)
        #self.assertRaises(GenomePositionError, cmap.g2c, -5)


 class Testc2p(unittest.TestCase):
    """Test success of good input and failure of bad input to c2p"""

    # what it should do

    # integer within length of exon should return correct protein
    def testGoodExons(self):
        """c2p should work for exon positions"""
        for arg, expect in zip(c_exons, p_exons_trip):
            self.assertEqual(cmap.c2p(arg), expect)

    # FIXME should CDSPosition return None or raise error?
    def testBadNotProtein(self):
        """c2p should fail for CDSPosition or str"""
        bad = ("string", CDSPosition.from_anchor(7, -5))
        for arg in bad:
            self.assertRaises(ValueError, cmap.c2p, arg)


 class Testp2c(unittest.TestCase):
    """Test success of good input and failure of bad input to p2c"""

    # what it should do

    # integer within length of protein should return correct range
    def testGoodExons(self):
        """p2c should work for exon positions"""
        #for arg, expect in p2c_exons.iteritems():
        for arg, expect in zip(p_exons, c_exon_prs):
            self.assertEqual(cmap.p2c(arg), expect)

    def testBadTooLarge(self):
        """p2c should fail for positions longer than the max length"""
        self.assertRaises(ProteinPositionError, cmap.p2c, 500)

    def testBadNegative(self):
        """p2c should fail for negative protein coordinate"""
        self.assertRaises(ProteinPositionError, cmap.p2c, -50)


 class Testc2g(unittest.TestCase):
    """Test success of good input and failure of bad input to c2g"""

    def setUp(self):
        self.dialect = None

    # what it should do

    # integer within length of exon should return correct value
    def testGoodExons(self):
        """c2g should work for exon positions"""
        for arg, expected in zip(c_exons, g_exons):
            self.assertEqual(cmap.c2g(arg), expected)

    # CDSPosition object should return correct value
    @two_dialects
    def testGoodIntrons(self):
        """c2g should work for introns (CDSPosition)"""
        dia = self.dialect
        for c_args, g in zip(c_intron_tups, g_introns[dia]):
            actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia)
            self.assertEqual(actual.to(dia), g)

    # FIXME doesn't work with HGVS
    #@two_dialects
    def testGoodOutside(self):
        """c2g should work for outside (CDSPosition)"""
        for args, expect in zip(c_outside_tups, g_outside[self.dialect]):
            self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)),
                             expect)

    # simple string should return correct value
    #    \d*[+-]\d+
    @two_dialects
    def testGoodIntronsStr(self):
        """c2g should work for intron positions (string)"""
        dia = self.dialect
        # c_str from dialect to *dialect* matches *dialect* g_str
        for c_str, g in zip(c_introns[dia], g_introns[dia]):
            self.assertEqual(cmap.c2g(c_str, dia).to(dia), g)

        # c_str from dialect as *default* matches *default* g_str
        for c_str, g in zip(c_introns[dia], g_introns[None]):
            self.assertEqual(cmap.c2g(c_str, dia), g)

    @two_dialects
    def testGoodOutsideStr(self):
        """c2g should work for outside positions (string)"""
        dia = self.dialect
        for expected, arg in zip(g_outside[dia], c_outside[dia]):
            self.assertEqual(cmap.c2g(arg, dia).to(dia), expected)

    def testNegativeInt(self):
        """c2g should treat negative integer as pre-CDS"""
        # XXX negative integers are treated as pre-CDS
        self.assertEqual(cmap.c2g(-2), 5806)

    # what it shouldn't do

    # bad positions in form 123+4 where 123 is not end of an exon
    # or where sign is wrong
    @two_dialects
    def testBadNotEnd(self):
        """c2g should fail on invalid introns"""
        bad_introns = {None: ("160+5",  # 160 is exonic
                       "51-6",  # 51 is end
                       "52+10",  # 52 is start
        ), 'HGVS': ("160+5", "52-6", "53+10")}
        for arg in bad_introns[self.dialect]:
            self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect)

    @two_dialects
    def testBadString(self):
        """c2g should fail on arbitrary string"""
        bad_vals = {None: ["xxx", "4-5'", "*10"], 'HGVS': ["xxx", "4-5"]}
        for arg in bad_vals[self.dialect]:
            self.assertRaises(ValueError, cmap.c2g, arg, self.dialect)

    # integers outside length of exon should raise IndexError
    def testBadTooLarge(self):
        """c2g should fail on values beyond the exon"""
        num = 400
        assert num > len(cmap.exons)
        self.assertRaises(IndexError, cmap.c2g, num)
        self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12))


 class Testp2g(unittest.TestCase):

    def testGoodStr(self):
        """p2g should return expected ranges"""
        for p_arg, gpos in zip(p_exons, g_exon_prs):
            self.assertEqual(cmap.p2g(p_arg), gpos)


 class Testg2p(unittest.TestCase):

    def testGoodStr(self):
        """g2p should return correct position for exons"""
        for g_arg, ppos in zip(g_exons, p_exons_trip):
            self.assertEqual(cmap.g2p(g_arg), ppos)


 class TestStrand(unittest.TestCase):

    def setUp(self):
        locations = FeatureLocation(24, 30, -1) + FeatureLocation(35, 40, +1)
        feature = SeqFeature(locations, type="CDS")
        self.cm = CoordinateMapper(feature)
        self.g_exons = [29, 28, 27, 26, 25, 24,
                        35, 36, 37, 38, 39]
        assert list(self.cm.exons) == self.g_exons
        c_len = 11
        assert c_len == len(list(self.cm.exons))
        self.c_exons = xrange(c_len)

    def testg2c(self):
        "c2g and g2c should work for mixed-strand exons"
        for c, g in zip(self.c_exons, self.g_exons):
            self.assertEqual(c, self.cm.g2c(g))
            self.assertEqual(g, self.cm.c2g(c))
            if c < 6:
                self.assertEqual(self.cm.c2g(c).strand, -1)
            else:
                self.assertEqual(self.cm.c2g(c).strand, +1)


 if __name__ == "__main__":
    #unittest.main()
    suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition)
    suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper)
    suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g)
    suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c)
    suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p)
    suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c)
    suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g)
    suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p)
    suite8 = unittest.TestLoader().loadTestsFromTestCase(TestStrand)
    all_suites = [
        suite0,
        suite1,
        suite2,
        suite3,
        suite4,
        suite5,
        suite6,
        suite7,
        suite8,
    ]
    all = unittest.TestSuite(all_suites)
    unittest.TextTestRunner(verbosity=3).run(all)
	# CoordinateMapper -- map between genomic, cds, and protein coordinates
	# AUTHOR: Reece Hart <[email protected]>
	# Modifications: Lenna X. Peterson <[email protected]>
	# LICENSE: BioPython

	# Examples:
	# AB026906.1:g.7872G>T
	# AB026906.1:c.274G>T
	# BA...:p.Asp92Tyr
	#
	# All refer to congruent variants. A coordinate mapper is needed to
	# translate between at least these three coordinate frames. The mapper
	# should deal with specialized syntax for splicing and UTR (e.g., 88+1,
	# 89-2, -14, *46). In addition, care should be taken to ensure consistent 0-
	# or 1-based numbering (0 internally, as with Python/BioPython and
	# Perl/BioPerl).
	#
	# g -----------00000000000-----1111111----------22222222222*222-----
	# s0 e0 s1 e1 s2 e2
	# \ \ \| \| / /
	# +--+ +--+ \| \| +-------+ +-------+
	# \ \\| \|/ /
	# c 00000000000111111122222222222*222
	# c0 c1 c2
	# aaabbbcccdddeeefffggghhhiiijj*kkk
	# p A B C D E F G H I J K
	# p0 p1 p2 ... pn
	#
	#
	# TODO:
	# * g2c returns index + extended syntax
	# * c2g accepts index + extended syntax

	from functools import wraps
	from math import floor
	import warnings

	import MapPositions # required for pos_factory decorator
	from MapPositions import CDSPositionError, ProteinPositionError
	from MapPositions import GenomePosition, CDSPosition, ProteinPosition

	from Bio import BiopythonParserWarning
	# FIXME change to absolute import once CompoundLocation is merged to master
	from SeqFeature import FeatureLocation


	class CoordinateMapper(object):
	"""Convert positions between genomic, CDS, and protein coordinates."""

	def __init__(self, selist):
	"""Set exons to be used for mapping.

	Parameters
	----------
	selist : SeqRecord, SeqFeature, list
	Object containing exon information
	"""
	self._exons = self._get_exons(selist)

	@staticmethod
	def _get_exons(seq):
	"""Extract exons from SeqRecord, SeqFeature, or list of pairs.

	Parameters
	----------
	seq : SeqRecord, SeqFeature, list
	Object containing exon information.

	Returns
	-------
	SeqFeature.FeatureLocation
	"""
	# Try as SeqRecord
	if hasattr(seq, 'features'):
	# generator
	cdsf = (f for f in seq.features if f.type == 'CDS').next()
	return cdsf.location
	# Try as SeqFeature
	elif hasattr(seq, 'location'):
	if seq.type != 'CDS':
	# FIXME should this be a fatal error?
	warnings.warn("Provided SeqFeature should be CDS",
	BiopythonParserWarning)
	return seq.location
	# Try as list of pairs
	return sum([FeatureLocation(s, e) for s, e in seq])

	@property # read-only
	def exons(self):
	return self._exons

	@property # read-only
	def exon_list(self):
	return list(self.exons)

	def pos_factory(pos_type):
	"""
	Convert string or int pos to appropriate Position object

	Parameters
	----------
	pos_type : str
	Position type (Genome, CDS, Protein)
	"""
	def wrapper(fn):
	@wraps(fn)
	def make_pos(self, pos, dialect=None):
	# retrieve Position object
	_obj = getattr(MapPositions, pos_type + "Position")
	# if pos is not already Position object, make it one
	if not isinstance(pos, _obj):
	# no dialect: use default constructor
	if dialect is None:
	pos = _obj(pos)
	# use dialect alternate constructor
	else:
	pos = _obj.from_dialect(dialect, pos)
	# call function with new pos
	return fn(self, pos, dialect)
	return make_pos
	return wrapper

	@pos_factory("Genome")
	def g2c(self, gpos, dialect=None):
	"""Convert integer from genomic to CDS coordinates

	Parameters
	----------
	gpos : int
	Genomic position
	dialect : str
	Coordinate dialect (GenBank or HGVS, default None)

	Returns
	-------
	CDSPosition
	"""
	gpos = int(gpos)
	fmts = CDSPosition.fmt_dict

	def _simple_g2c(g):
	return CDSPosition(self.exon_list.index(g))

	# within exon
	if gpos in self.exons:
	return _simple_g2c(gpos)
	# before CDS
	if gpos < self.exons.start:
	return CDSPosition(fmts['post-CDS'] % (gpos - self.exons.start))
	# after CDS
	if gpos >= self.exons.end:
	return CDSPosition(fmts['pre-CDS'] % (gpos - self.exons.end + 1))
	# intron
	# set start of first intron
	prev_end = self.exons.parts[0].end
	for part in self.exons.parts[1:]:
	# not in this intron
	if gpos > part.start:
	prev_end = part.end
	continue
	len_intron = part.start - prev_end
	# second half (exclusive) of intron
	# offset > middle of intron
	if gpos - prev_end > floor(len_intron / 2.0):
	anchor = _simple_g2c(part.start)
	offset = gpos - part.start
	assert offset < 0
	# first half (inclusive) of intron
	else:
	anchor = _simple_g2c(prev_end - 1)
	offset = gpos - prev_end + 1
	assert offset > 0
	assert self.check_intron(anchor, offset)
	return CDSPosition(fmts['intron'] % (anchor, offset))

	assert False # function should return for every integer

	# TODO verify that values of offset are sane
	def check_intron(self, anchor, offset):
	"""
	Verify that CDS-relative intron position is valid with given exons.

	Parameters
	----------
	anchor : int
	Intron anchor (closest CDS position)
	offset : int
	Intron offset (distance to anchor)

	Returns
	-------
	bool
	"""
	for exon in self.exons.parts:
	start = int(self.g2c(exon.start))
	if anchor == start:
	if offset > 0:
	raise CDSPositionError(
	"Invalid intron: offset from exon start must be negative.")
	return True
	end = int(self.g2c(exon.end - 1))
	if anchor == end:
	if offset < 0:
	raise CDSPositionError(
	"Invalid intron: offset from exon end must be positive.")
	return True
	raise CDSPositionError(
	"Invalid intron: anchor must be start or end of an exon.")

	def get_strand(self, gpos):
	for exon in self.exons.parts:
	if gpos in exon:
	return exon.strand
	raise ValueError("Provided gpos must be exon")

	@pos_factory("CDS")
	def c2g(self, cpos, dialect=None):
	"""Convert from CDS to genomic coordinates

	Parameters
	----------
	cpos : int
	CDS position
	dialect : str
	Coordinate dialect (GenBank or HGVS, default None)

	Returns
	-------
	GenomePosition
	"""
	if cpos.pos_type == "pre-CDS":
	return GenomePosition(self.exons.start + cpos.offset)
	elif cpos.pos_type == "post-CDS":
	return GenomePosition(self.exons.end - 1 + cpos.offset)

	g_anchor = self.exon_list[cpos.anchor]
	if cpos.pos_type == "exon":
	strand = self.get_strand(g_anchor)
	return GenomePosition(g_anchor, strand=strand)
	elif cpos.pos_type == "intron":
	offset = cpos.offset
	if self.check_intron(cpos.anchor, offset):
	return GenomePosition(g_anchor + offset)

	assert False # all positions should be one of the 4 types

	@pos_factory("CDS")
	def c2p(self, cpos, dialect=None):
	"""Convert from CDS to protein coordinates

	Parameters
	----------
	cpos : int
	CDS position
	dialect : str
	Coordinate dialect (GenBank or HGVS, default None)

	Returns
	-------
	ProteinPosition
	"""
	try:
	cpos = int(cpos)
	except TypeError:
	raise CDSPositionError("'%s' does not correspond to a protein"
	% repr(cpos))
	return ProteinPosition(int(cpos / 3.0))

	@pos_factory("Genome")
	def g2p(self, gpos, dialect=None):
	"""Convert integer from genomic to protein coordinates

	Parameters
	----------
	gpos : int
	Genomic position
	dialect : str
	Coordinate dialect (GenBank or HGVS, default None)

	Returns
	-------
	ProteinPosition
	"""
	return self.c2p(self.g2c(gpos))

	@pos_factory("Protein")
	def p2c(self, ppos, dialect=None):
	"""Convert integer from protein coordinate to CDS closed range

	Parameters
	----------
	ppos : int
	Protein position
	dialect : str
	Coordinate dialect (GenBank or HGVS, default None)

	Returns
	-------
	CDSPosition
	"""
	try:
	ppos = int(ppos)
	except TypeError:
	return None
	if ppos < 0:
	raise ProteinPositionError("'%s' should not be negative")
	# FIXME is CDS guaranteed to have len % 3 == 0?
	first_base = (ppos) * 3
	last_base = first_base + 2
	if last_base > len(self.exons):
	raise ProteinPositionError("'%s' is too large")
	return (CDSPosition(first_base), CDSPosition(last_base))

	@pos_factory("Protein")
	def p2g(self, ppos, dialect=None):
	"""Convert integer from protein to genomic coordinates

	Parameters
	----------
	ppos : int
	Protein position
	dialect : str
	Coordinate dialect (GenBank or HGVS, default None)

	Returns
	-------
	GenomePosition
	"""
	return tuple(self.c2g(x) for x in self.p2c(ppos))


	if __name__ == '__main__':
	# The following exons are from AB026906.1.
	# test case: g.7872 -> c.274 -> p.92
	# N.B. These are python counting coordinates (0-based)
	exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]

	def test_list(g_range):
	cm = CoordinateMapper(exons)
	for g1 in g_range:
	print g1,
	c1 = cm.g2c(g1)
	print c1,
	p1 = cm.c2p(c1)
	print p1,
	if p1:
	c2 = cm.p2c(p1)[0]
	else:
	c2 = c1
	print ' \| ', c2,
	g2 = cm.c2g(c2)
	print g2

	print cm.g2p(7872)
	print cm.p2g(92)

	def test_simple():
	from SeqFeature import SeqFeature
	location = sum([FeatureLocation(2, 4, +1),
	FeatureLocation(8, 11, +1),
	FeatureLocation(16, 18, +1)])
	simple_exons = SeqFeature(location, type="CDS")
	cm = CoordinateMapper(simple_exons)
	print cm.exons
	print list(cm.exons)
	print range(len(cm.exons))
	for i in range(len(cm.exons)):
	print "%3s" % cm.c2g(i),
	print
	for i in xrange(20):
	print "%3d" % i,
	print
	for i in xrange(20):
	print "%3s" % cm.g2c(i),
	print

	r1 = (7870, 7871, 7872, 7873, 7874)
	r2 = (5807, 5808, 5809,
	6871, 6872, 6873, 6874, 6875,
	7766, 7767, 7768, 7769,
	13784, 13785, 13786)
	test_list(r1)
	#test_list(r2)
	print
	test_simple()
	from copy import copy
	import re


	class InvalidPositionError(ValueError):
	"Exception for bad coordinates"


	class GenomePositionError(InvalidPositionError):
	"Exception for bad genome coordinates"


	class CDSPositionError(InvalidPositionError):
	"Exception for bad CDS coordinates"


	class ProteinPositionError(InvalidPositionError):
	"Exception for bad protein coordinates"


	class MapPosition(object):
	"""Generic position for coordinate mapping."""
	def __init__(self, pos, index=None, **kwargs):
	"""Init class from HGVS position.

	Parameters
	----------
	pos : int, str
	Position to convert
	index : int
	Start of counting (e.g. 1 for GenBank or HGVS)
	"""
	self.index = index
	if pos and self.index:
	pos -= index
	self.pos = pos

	@classmethod
	def from_dialect(cls, dialect, pos, strand=None):
	"""Init class from given dialect.

	Parameters
	----------
	dialect : str
	input dialect (HGVS or GenBank)
	pos : int, str
	dialect position to convert
	strand : int
	Strand of position (-1 or +1, default None)

	Returns
	-------
	cls
	"""
	if dialect is None:
	return cls(pos, strand=strand)
	try:
	from_func = getattr(cls, "from_" + dialect.lower())
	except AttributeError:
	raise ValueError("Dialect '%s' not valid" % dialect)
	return from_func(pos, strand)

	@classmethod
	def from_hgvs(cls, hgvs_pos, strand=None):
	"""Init class from HGVS position.

	Parameters
	----------
	hgvs_pos : int, str
	HGVS position to convert
	strand : int
	Strand of position (-1 or +1, default None)

	Returns
	-------
	cls
	"""
	return cls(hgvs_pos, index=1, strand=strand, post_fmt="*%d")

	@classmethod
	def from_genbank(cls, gbk_pos, strand=None):
	"""Init class from GenBank position.

	Parameters
	----------
	gbk_pos : int, str
	GenBank position to convert
	strand : int
	Strand of position (-1 or +1, default None)

	Returns
	-------
	cls
	"""
	return cls(gbk_pos, index=1, strand=strand)

	def to(self, dialect):
	"""Convert position to specified dialect.

	Parameters
	----------
	dialect : str
	Output dialect (HGVS or GenBank)
	"""
	if dialect is None:
	return self.to_str()
	try:
	to_func = getattr(self, "to_" + dialect.lower())
	except AttributeError:
	raise ValueError("Dialect '%s' not valid" % dialect)
	return to_func()

	def to_hgvs(self):
	"""Convert position to HGVS"""
	if self.pos or self.pos == 0:
	return self.pos + 1
	return None

	def to_genbank(self):
	"""Convert position to GenBank"""
	if self.pos or self.pos == 0:
	return self.pos + 1
	return None

	def to_str(self):
	"""Make string representation without conversion"""
	return self.pos

	def __str__(self):
	"""String representation"""
	return str(self.to_str())

	def __int__(self):
	"""Integer representation"""
	return self.pos

	def __repr__(self):
	"""Detailed representation for debugging"""
	return "%s(%s)" % (self.__class__.__name__, self.to_str())

	class GenomePosition(MapPosition):
	"""Genome position for coordinate mapping"""
	def __init__(self, gpos, index=None, strand=None, **kwargs):
	# FIXME if index is string, error may be raised
	if gpos < (index or 0):
	raise GenomePositionError("Genome position cannot be negative.")
	# call superclass constructor
	MapPosition.__init__(self, gpos, index)
	self.strand = strand

	def __eq__(self, other):
	"""Compare equal to other GenomePosition with same pos
	or integer equal to pos"""
	if isinstance(other, int):
	return self.pos == other
	return isinstance(other, GenomePosition) and self.pos == other.pos


	class ProteinPosition(MapPosition):
	"""Protein position for coordinate mapping"""
	def __init__(self, ppos, index=None, **kwargs):
	# call superclass constructor
	MapPosition.__init__(self, ppos, index)

	def __eq__(self, other):
	"""Compare equal to other ProteinPosition with same pos
	or integer equal to pos"""
	if isinstance(other, int):
	return self.pos == other
	return isinstance(other, ProteinPosition) and self.pos == other.pos


	class CDSPosition(MapPosition):
	"""CDS position for coordinate mapping"""
	def __init__(self, cpos, index=None,
	pre_fmt=None, post_fmt=None, **kwargs):
	# Dispatch types and return anchor, offset
	if isinstance(cpos, int):
	anchor, offset = self.parse_int(cpos)
	elif isinstance(cpos, str):
	anchor, offset = self.parse_str(cpos, pre_fmt, post_fmt)
	else:
	raise CDSPositionError("'%s' is of unknown type" % cpos)
	# Set instance anchor and offset
	# call superclass constructor
	MapPosition.__init__(self, anchor, index)
	self._offset = offset
	self.validate()

	@classmethod
	def from_anchor(cls, anchor=None, offset=None):
	"""Init CDSPosition with anchor, offset pair.

	Parameters
	----------
	anchor : int
	CDS anchor (coordinate of nearest exon position)
	offset : int
	Offset from nearest exon position

	Returns
	-------
	CDSPosition
	"""
	if not anchor:
	pos = cls("%+d" % offset)
	elif anchor < 0:
	raise CDSPositionError("Anchor cannot be negative.")
	else:
	pos = cls(anchor)
	pos.offset = offset
	return pos

	@property
	def offset(self):
	return self._offset

	@offset.setter
	def offset(self, val):
	"Validate new offset, then update"
	self.validate(offset=val)
	self._offset = val

	@property
	def anchor(self):
	return self.pos

	@anchor.setter
	def anchor(self, val):
	"Validate new anchor, then update pos"
	self.validate(anchor=val)
	self.pos = val

	def validate(self, anchor="sentinel", offset="sentinel"):
	"""Check whether anchor and offset yield a valid position.

	Parameters
	----------
	anchor : int
	CDS anchor (coordinate of nearest exon position)
	offset : int
	Offset from nearest exon position

	Returns
	-------
	bool
	"""

	if anchor == "sentinel":
	anchor = self.anchor
	if offset == "sentinel":
	offset = self.offset
	if offset == 0:
	raise CDSPositionError(
	"Offset may not be 0. For no offset, use None.")
	if not anchor and anchor != 0 and not offset:
	raise CDSPositionError(
	"At least one of pos or offset must be defined")
	if anchor and anchor < 0:
	raise CDSPositionError("CDS anchor may not be negative.")
	return True

	@property
	def pos_type(self):
	"Type of CDS position, dynamically determined from values"
	# inside CDS
	if self.pos or self.pos == 0:
	if not self.offset:
	return "exon"
	return "intron"
	# outside CDS
	elif self.offset > 0:
	return "post-CDS"
	return "pre-CDS"
	assert False # all integers should return

	@property
	def sub_dict(self):
	if self.pos_type == 'intron':
	return {'pos': self.pos, 'offset': self.offset}
	if self.pos_type == 'exon':
	return {'pos': self.pos}
	if self.pos_type == 'post-CDS' or self.pos_type == 'pre-CDS':
	return {'offset': self.offset}

	fmt_dict = {
	'exon': "%d",
	'intron': "%d%+d",
	'post-CDS': "%+d",
	'pre-CDS': "%+d",
	}

	@staticmethod
	def _shift_index(pos_dict, idx):
	"""Increment value of dict key 'pos' by given index.

	Parameters
	----------
	pos_dict : dict
	Dictionary to search for 'pos'
	idx : int
	Index to add to pos_dict['pos']

	Returns
	-------
	dict
	"""
	if 'pos' in pos_dict and idx:
	pos_dict['pos'] += idx
	return pos_dict

	def _make_str(self, val_dict=None, fmt_dict=None):
	"""Retrieve format string and substitute values.

	Parameters
	----------
	val_dict : dict
	Dictionary of values to substitute into string
	fmt_dict : dict
	Dictionary of format strings for each pos type

	Returns
	-------
	str
	"""
	# set default dicts if parameter dicts are false
	fmt_dict = fmt_dict or self.fmt_dict
	val_dict = val_dict or self.sub_dict
	return fmt_dict[self.pos_type] % tuple(val_dict.values())

	@staticmethod
	def parse_int(cpos):
	"""Parse int to anchor, offset pair

	Parameters
	----------
	cpos : int
	Integer position to convert

	Returns
	-------
	tuple
	"""
	# treat negative int as offset
	if cpos < 0:
	return (None, cpos)
	# treat positive int as anchor
	return (cpos, None)

	@staticmethod
	def parse_str(cpos, pre_fmt, post_fmt):
	"""Parse string to anchor, offset pair

	Parameters
	----------
	cpos : str
	String position to convert
	pre_fmt : str
	Format string for pre-CDS positions
	post_fmt : str
	Format string for post-CDS positions

	Returns
	-------
	tuple
	"""
	delimiters = "\+\-"
	if post_fmt and "*" in post_fmt:
	delimiters += "\*"
	# parenth causes split pattern to be kept
	delim_rx = re.compile("([%s])" % delimiters)
	parsed = delim_rx.split(cpos, 1)
	if len(parsed) == 1: # may be int
	return CDSPosition.parse_int(int(cpos))
	# 1 split is normally length 2 but delimiter is also kept
	elif len(parsed) != 3:
	raise CDSPositionError(
	"String '%s' not parseable for this position." % cpos)
	if parsed[0] == "":
	anchor = None
	else:
	anchor = int(parsed[0])
	if post_fmt and parsed[1] == "*" == post_fmt[0]:
	parsed[1] = "+"
	offset = int("".join(parsed[1:]))
	return (anchor, offset)

	def to_hgvs(self):
	"""Convert CDS position to HGVS"""
	fmt_dict = copy(self.fmt_dict)
	fmt_dict['post-CDS'] = "*%d"
	sub_dict = self._shift_index(self.sub_dict, 1)
	return self._make_str(sub_dict, fmt_dict)

	def to_genbank(self):
	"""Convert CDS position to GenBank"""
	sub_dict = self._shift_index(self.sub_dict, 1)
	return self._make_str(sub_dict)

	def to_str(self):
	"""Make string representation of CDS position"""
	return self._make_str()

	def __int__(self):
	"""Integer representation of CDS exon, otherwise NotImplemented"""
	if self.pos_type == "exon":
	return MapPosition.__int__(self)
	return NotImplemented

	def __eq__(self, other):
	"""Compare equal to other MapPosition with same pos and offset
	or int if exon"""
	if isinstance(other, int) and self.pos_type == "exon":
	return self.pos == other
	return isinstance(other, CDSPosition) and \
	self.pos == other.pos and \
	self.offset == other.offset


	if __name__ == "__main__":

	def print_pos(pos_obj):
	print "object:", pos_obj
	print "repr:", repr(pos_obj)
	print "HGVS:", pos_obj.to_hgvs()
	print

	g = GenomePosition.from_hgvs(6)
	print_pos(g)

	test_g = GenomePosition(5)
	#test_c = CDSPosition("6+1")
	#test_c = CDSPosition.from_hgvs("6+1")
	test_c = CDSPosition.from_hgvs("*1")
	#test_c = CDSPosition(6)
	#test_c = CDSPosition(-1)
	print_pos(test_g)
	print test_c.pos_type
	print_pos(test_c)
	#!/usr/bin/python

	from functools import wraps
	import unittest

	from CoordinateMapper import CoordinateMapper
	from MapPositions import GenomePositionError
	from MapPositions import ProteinPositionError
	from MapPositions import CDSPosition, CDSPositionError
	from SeqFeature import FeatureLocation, SeqFeature
	from Bio.SeqRecord import SeqRecord


	exons = [(5808, 5860), (6757, 6874), (7767, 7912), (13709, 13785)]
	cmap = CoordinateMapper(exons)
	g_exons = xrange(7868, 7875) # len 7
	c_exons = xrange(270, 279) # len 9
	p_exons = xrange(90, 93) # len 3
	p_exons_trip = [p for p in p_exons for n in range(3)] # len 9
	c_exon_prs = ((270, 272), (273, 275), (276, 278)) # len 3
	g_exon_prs = ((7868, 7870), (7871, 7873), (7874, 7876)) # len 3

	g_introns = {None: (5860, 5861, 6308, 6309, 6755, 6756),
	'HGVS': (5861, 5862, 6309, 6310, 6756, 6757)}
	c_introns = {None: ('51+1', '51+2', '51+449', '52-448', '52-2', '52-1'),
	'HGVS': ('52+1', '52+2', '52+449', '53-448', '53-2', '53-1')}
	c_intron_tups = ((51, 1), (51, 2), (51, 449), (52, -448), (52, -2), (52, -1))

	g_outside = {None: (0, 13785, 14000), 'HGVS': (1, 13786, 14001)}
	c_outside = {None: ('-5808', '+1', '+216'), 'HGVS': ('-5808', '1', '216')}
	c_outside_tups = ((None, -5808), (None, 1), (None, 216))


	def two_dialects(fn):
	@wraps(fn)
	def call_tests(self):
	orig_dialect = self.dialect
	for dialect in (None, 'HGVS'):
	self.dialect = dialect
	fn(self)
	self.dialect = orig_dialect
	return call_tests


	class TestCDSPosition(unittest.TestCase):
	"""Test that CDSPosition works properly"""

	def setUp(self):
	self.dialect = None

	@two_dialects
	def testGoodIntron(self):
	"""CDSPosition should match good intron values"""
	for c_args, c in zip(c_intron_tups, c_introns[self.dialect]):
	actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
	self.assertEqual(actual, c)

	@two_dialects
	def testGoodOutside(self):
	"""CDSPosition should match good outside-CDS values"""
	for c_args, c in zip(c_outside_tups, c_outside[self.dialect]):
	actual = CDSPosition.from_anchor(*c_args).to(self.dialect)
	self.assertEqual(actual, c)

	def testEqual(self):
	"""CDSPosition should test equal with same args"""
	for args in c_intron_tups:
	CPos = CDSPosition.from_anchor
	self.assertEqual(CPos(args), CPos(args))
	self.assertEqual(str(CPos(args)), str(CPos(args)))

	#def testEqualDialects(self):
	#for c_pos in c_outside:
	#self.assertEqual(CDSPosition(c_pos), CDSPosition.from_hgvs(c_pos))
	#self.assertEqual(c_pos, CDSPosition.from_hgvs(c_pos).to_hgvs())

	def testBadAnchor(self):
	"""CDSPosition should fail with negative CDS anchor"""
	self.assertRaises(CDSPositionError, CDSPosition.from_anchor, -1, 1)

	def testBadOffset(self):
	"""CDSPosition should fail with zero offset"""
	self.assertRaises(CDSPositionError, CDSPosition.from_anchor, None, 0)

	def testValidate(self):
	"""Setting CDSPosition anchor offset should fail for invalid cases"""
	intron = CDSPosition("22+4")
	self.assertRaises(CDSPositionError, setattr, intron, "offset", 0)
	self.assertRaises(CDSPositionError, setattr, intron, "anchor", -5)
	exon = CDSPosition("60")
	self.assertRaises(CDSPositionError, setattr, exon, "anchor", None)

	class TestCoordinateMapper(unittest.TestCase):
	"""Test that CoordinateMapper works properly"""

	def setUp(self):
	self.sf = SeqFeature(sum((FeatureLocation(s, e) for s, e in exons)),
	type="CDS")

	def testListInit(self):
	"""CoordinateMapper should take list of exon pairs"""
	cm = CoordinateMapper(exons)
	# FIXME CompoundLocation does not have __eq__
	self.assertEqual(str(cm.exons), str(self.sf.location))

	def testSeqRecordInit(self):
	"""CoordinateMapper should use first CDS feature of SeqRecord"""
	sr = SeqRecord("", features=[self.sf])
	cm = CoordinateMapper(sr)
	self.assertEqual(str(cm.exons), str(self.sf.location))

	def testSeqFeatureInit(self):
	"""CoordinateMapper should take a CDS SeqFeature"""
	cm = CoordinateMapper(self.sf)
	self.assertEqual(str(cm.exons), str(self.sf.location))

	def testEmptyInit(self):
	"""CoordinateMapper should fail with no arguments"""
	self.assertRaises(Exception, CoordinateMapper)


	class Testg2c(unittest.TestCase):
	"""Test success of good input and failure of bad input to g2c"""

	def setUp(self):
	self.dialect = None

	# what it should do

	# any integer should return
	def testGoodExons(self):
	"""g2c should work for exon positions"""
	for arg, expected in zip(g_exons, c_exons):
	self.assertEqual(cmap.g2c(arg), expected)

	@two_dialects
	def testGoodIntronsStr(self):
	"""g2c should work for intron positions (string)"""
	dia = self.dialect
	for arg, expect in zip(g_introns[dia], c_introns[dia]):
	actual = cmap.g2c(arg, dia).to(dia)
	self.assertEqual(actual, expect)

	@two_dialects
	def testGoodIntrons(self):
	"""g2c should work for intron positions (CDSPosition)"""
	for arg, tup in zip(g_introns[self.dialect], c_intron_tups):
	actual = cmap.g2c(arg, self.dialect)
	expect = CDSPosition.from_anchor(*tup)
	self.assertEqual(actual, expect)
	self.assertEqual(str(actual), str(expect))

	@two_dialects
	def testGoodOutsideStr(self):
	"""g2c should work for outside positions (string)"""
	dia = self.dialect
	for arg, expected in zip(g_outside[dia], c_outside[dia]):
	actual = cmap.g2c(arg, dia).to(dia)
	self.assertEqual(actual, expected)

	def testGoodOutside(self):
	"""g2c should work for outside positions (CDSPosition)"""
	for arg, tup in zip(g_outside[self.dialect], c_outside_tups):
	actual = cmap.g2c(arg)
	expect = CDSPosition.from_anchor(*tup)
	self.assertEqual(actual, expect)
	self.assertEqual(str(actual), str(expect))

	# what it shouldn't do

	# should it handle non-exact positions?

	@two_dialects
	def testZeroArg(self):
	"""g2c should work for 0 in no dialect and fail for 1-index"""
	args = (0, self.dialect)
	if self.dialect is None:
	cmap.g2c(*args)
	else:
	self.assertRaises(GenomePositionError, cmap.g2c, *args)

	@two_dialects
	def testBadArg(self):
	"""g2c should fail on string, float, None, or negative"""
	bad = ("string", None, 3.14, -5)
	for arg in bad:
	self.assertRaises(Exception, cmap.g2c, arg)
	#self.assertRaises(GenomePositionError, cmap.g2c, -5)


	class Testc2p(unittest.TestCase):
	"""Test success of good input and failure of bad input to c2p"""

	# what it should do

	# integer within length of exon should return correct protein
	def testGoodExons(self):
	"""c2p should work for exon positions"""
	for arg, expect in zip(c_exons, p_exons_trip):
	self.assertEqual(cmap.c2p(arg), expect)

	# FIXME should CDSPosition return None or raise error?
	def testBadNotProtein(self):
	"""c2p should fail for CDSPosition or str"""
	bad = ("string", CDSPosition.from_anchor(7, -5))
	for arg in bad:
	self.assertRaises(ValueError, cmap.c2p, arg)


	class Testp2c(unittest.TestCase):
	"""Test success of good input and failure of bad input to p2c"""

	# what it should do

	# integer within length of protein should return correct range
	def testGoodExons(self):
	"""p2c should work for exon positions"""
	#for arg, expect in p2c_exons.iteritems():
	for arg, expect in zip(p_exons, c_exon_prs):
	self.assertEqual(cmap.p2c(arg), expect)

	def testBadTooLarge(self):
	"""p2c should fail for positions longer than the max length"""
	self.assertRaises(ProteinPositionError, cmap.p2c, 500)

	def testBadNegative(self):
	"""p2c should fail for negative protein coordinate"""
	self.assertRaises(ProteinPositionError, cmap.p2c, -50)


	class Testc2g(unittest.TestCase):
	"""Test success of good input and failure of bad input to c2g"""

	def setUp(self):
	self.dialect = None

	# what it should do

	# integer within length of exon should return correct value
	def testGoodExons(self):
	"""c2g should work for exon positions"""
	for arg, expected in zip(c_exons, g_exons):
	self.assertEqual(cmap.c2g(arg), expected)

	# CDSPosition object should return correct value
	@two_dialects
	def testGoodIntrons(self):
	"""c2g should work for introns (CDSPosition)"""
	dia = self.dialect
	for c_args, g in zip(c_intron_tups, g_introns[dia]):
	actual = cmap.c2g(CDSPosition.from_anchor(*c_args), dia)
	self.assertEqual(actual.to(dia), g)

	# FIXME doesn't work with HGVS
	#@two_dialects
	def testGoodOutside(self):
	"""c2g should work for outside (CDSPosition)"""
	for args, expect in zip(c_outside_tups, g_outside[self.dialect]):
	self.assertEqual(cmap.c2g(CDSPosition.from_anchor(*args)),
	expect)

	# simple string should return correct value
	# \d*[+-]\d+
	@two_dialects
	def testGoodIntronsStr(self):
	"""c2g should work for intron positions (string)"""
	dia = self.dialect
	# c_str from dialect to dialect matches dialect g_str
	for c_str, g in zip(c_introns[dia], g_introns[dia]):
	self.assertEqual(cmap.c2g(c_str, dia).to(dia), g)

	# c_str from dialect as default matches default g_str
	for c_str, g in zip(c_introns[dia], g_introns[None]):
	self.assertEqual(cmap.c2g(c_str, dia), g)

	@two_dialects
	def testGoodOutsideStr(self):
	"""c2g should work for outside positions (string)"""
	dia = self.dialect
	for expected, arg in zip(g_outside[dia], c_outside[dia]):
	self.assertEqual(cmap.c2g(arg, dia).to(dia), expected)

	def testNegativeInt(self):
	"""c2g should treat negative integer as pre-CDS"""
	# XXX negative integers are treated as pre-CDS
	self.assertEqual(cmap.c2g(-2), 5806)

	# what it shouldn't do

	# bad positions in form 123+4 where 123 is not end of an exon
	# or where sign is wrong
	@two_dialects
	def testBadNotEnd(self):
	"""c2g should fail on invalid introns"""
	bad_introns = {None: ("160+5", # 160 is exonic
	"51-6", # 51 is end
	"52+10", # 52 is start
	), 'HGVS': ("160+5", "52-6", "53+10")}
	for arg in bad_introns[self.dialect]:
	self.assertRaises(CDSPositionError, cmap.c2g, arg, self.dialect)

	@two_dialects
	def testBadString(self):
	"""c2g should fail on arbitrary string"""
	bad_vals = {None: ["xxx", "4-5'", "*10"], 'HGVS': ["xxx", "4-5"]}
	for arg in bad_vals[self.dialect]:
	self.assertRaises(ValueError, cmap.c2g, arg, self.dialect)

	# integers outside length of exon should raise IndexError
	def testBadTooLarge(self):
	"""c2g should fail on values beyond the exon"""
	num = 400
	assert num > len(cmap.exons)
	self.assertRaises(IndexError, cmap.c2g, num)
	self.assertRaises(Exception, cmap.c2g, "%d%d" % (num, -12))


	class Testp2g(unittest.TestCase):

	def testGoodStr(self):
	"""p2g should return expected ranges"""
	for p_arg, gpos in zip(p_exons, g_exon_prs):
	self.assertEqual(cmap.p2g(p_arg), gpos)


	class Testg2p(unittest.TestCase):

	def testGoodStr(self):
	"""g2p should return correct position for exons"""
	for g_arg, ppos in zip(g_exons, p_exons_trip):
	self.assertEqual(cmap.g2p(g_arg), ppos)


	class TestStrand(unittest.TestCase):

	def setUp(self):
	locations = FeatureLocation(24, 30, -1) + FeatureLocation(35, 40, +1)
	feature = SeqFeature(locations, type="CDS")
	self.cm = CoordinateMapper(feature)
	self.g_exons = [29, 28, 27, 26, 25, 24,
	35, 36, 37, 38, 39]
	assert list(self.cm.exons) == self.g_exons
	c_len = 11
	assert c_len == len(list(self.cm.exons))
	self.c_exons = xrange(c_len)

	def testg2c(self):
	"c2g and g2c should work for mixed-strand exons"
	for c, g in zip(self.c_exons, self.g_exons):
	self.assertEqual(c, self.cm.g2c(g))
	self.assertEqual(g, self.cm.c2g(c))
	if c < 6:
	self.assertEqual(self.cm.c2g(c).strand, -1)
	else:
	self.assertEqual(self.cm.c2g(c).strand, +1)


	if __name__ == "__main__":
	#unittest.main()
	suite0 = unittest.TestLoader().loadTestsFromTestCase(TestCDSPosition)
	suite1 = unittest.TestLoader().loadTestsFromTestCase(TestCoordinateMapper)
	suite2 = unittest.TestLoader().loadTestsFromTestCase(Testc2g)
	suite3 = unittest.TestLoader().loadTestsFromTestCase(Testg2c)
	suite4 = unittest.TestLoader().loadTestsFromTestCase(Testc2p)
	suite5 = unittest.TestLoader().loadTestsFromTestCase(Testp2c)
	suite6 = unittest.TestLoader().loadTestsFromTestCase(Testp2g)
	suite7 = unittest.TestLoader().loadTestsFromTestCase(Testg2p)
	suite8 = unittest.TestLoader().loadTestsFromTestCase(TestStrand)
	all_suites = [
	suite0,
	suite1,
	suite2,
	suite3,
	suite4,
	suite5,
	suite6,
	suite7,
	suite8,
	]
	all = unittest.TestSuite(all_suites)
	unittest.TextTestRunner(verbosity=3).run(all)