Source code for autosnapgene.blocks.features

#!/usr/bin/env python3

from ..parser import Block, Xml, Repr
from ..util import reverse_complement

import autoprop
import xml.etree.ElementTree as etree
from more_itertools import one, always_iterable

@autoprop
class FeaturesBlock(Xml, Block):
    block_id = 10
    repr_attrs = ['features']

    # I'm not totally sure whats up with the id numbers in this block.  They 
    # aren't mentioned in the 2015 spec...

    class FeatureTag(Xml.AppendListTag):

        @staticmethod
        def from_xml(element):
            return Feature.from_xml(element)

        @staticmethod
        def to_xml(parent, tag, features):
            for feature in features:
                e = feature.to_xml()
                parent.append(e)

    xml_tag = 'Features'
    xml_subtag_defs = [
            ('features', 'Feature', FeatureTag, []),
    ]
    xml_attrib_defs = [
            ('_next_id', 'nextValidID', Xml.IntAttrib),

            # I don't know what this is at all...
            ('_recycled_ids', 'recycledIDs', Xml.TextAttrib),
    ]

    def __repr_attr__(self, attr):
        if attr == 'features':
            from textwrap import shorten
            return shorten(
                    ', '.join(x.name for x in self.features),
                    width=32,
                    placeholder='...',
            )

        else:
            return super().__repr_attr__(attr)

    def to_xml(self):
        self._next_id = len(self.features)
        return super().to_xml()

    def get_next_id(self):
        return len(self.features)


[docs]@autoprop
class Feature(Xml, Repr):
    repr_attrs = 'name', 'type'

[docs]    class SegmentTag(Xml.AppendListTag):

[docs]        @staticmethod
        def from_xml(element):
            return FeatureSegment.from_xml(element)

[docs]        @staticmethod
        def to_xml(parent, tag, segments):
            for segment in segments:
                e = segment.to_xml()
                parent.append(e)

[docs]    class QualifierTag(Xml.UpdateDictTag):
        data_types = {
                'int': int,
                'text': str,
        }
        data_formats = {v: k for k, v in data_types.items()}

[docs]        @classmethod
        def from_xml(cls, element):
            name = element.attrib['name']

            def get_value(sub):
                # This isn't in the spec, but I assume I'll never get multiple 
                # attributes in a <V> tag.
                # 
                # However, it is possible to get no attributes.  I've only 
                # encountered this is one example: a translated feature that 
                # was too short to actually have a translation (e.g. 1 bp).  In 
                # this case, value probably should be "", since "translation" 
                # is normally text.  Assuming that integer data will never just 
                # be left out like this, I'm going to interpret no attributes 
                # as "".
                if not sub.attrib:
                    return ""

                assert len(sub.attrib) == 1, etree.tostring(element)

                key, value = sub.attrib.popitem()
                return cls.data_types.get(key, str)(value)

            if len(element) == 1:
                value = get_value(element.find('V'))
            else:
                value = [get_value(x) for x in element.findall('V')]

            return {name: value}

[docs]        @classmethod
        def to_xml(cls, parent, tag, values):
            for name, value in values.items():
                q = etree.SubElement(parent, tag)
                q.attrib['name'] = str(name)

                for value_i in always_iterable(value):
                    v = etree.SubElement(q, 'V')

                    # Special-case empty strings as described in from_xml().
                    if value_i == "":
                        continue

                    data_format = cls.data_formats[type(value_i)]
                    v.attrib[data_format] = str(value_i)

[docs]    class DirectionalityAttrib(Xml.EnumAttrib):
        value_from_str = {
                '0': 'none',
                '1': 'forward',
                '2': 'backward',
                '3': 'bidirectional',
        }

[docs]    class CleavageArrowsAttrib:

[docs]        @staticmethod
        def from_str(str):
            return [int(x) for x in str.split(',')]

[docs]        @staticmethod
        def to_str(value):
            return ','.join(str(x) for x in value)

    xml_tag = 'Feature'
    xml_subtag_defs = [
            ('segments', 'Segment', SegmentTag, []),
            ('qualifiers', 'Q', QualifierTag, {}),
    ]
    xml_attrib_defs = [
            ('id', 'recentID', Xml.IntAttrib),
            ('name', 'name', Xml.TextAttrib),
            ('type', 'type', Xml.TextAttrib),
            ('directionality', 'directionality', DirectionalityAttrib),
            ('reading_frame', 'readingFrame', Xml.IntAttrib),
            ('cleavage_arrows', 'cleavageArrows', CleavageArrowsAttrib),
            ('allow_segment_overlaps', 'allowSegmentOverlaps', Xml.BoolAttrib),
            ('swapped_segment_numbering', 'swappedSegmentNumbering', Xml.BoolAttrib),
            ('max_run_on', 'maxRunOn', Xml.IntAttrib),
            ('max_fused_run_on', 'maxFusedRunOn', Xml.IntAttrib),
            ('detection_mode', 'detectionMode', Xml.TextAttrib),

            # Translation-related attributes:
            ('genetic_code_id', 'geneticCode', Xml.TextAttrib),
            ('first_codon_met', 'translateFirstCodonAsMet', Xml.BoolAttrib),
            ('consecutive_translation_numbering', 'consecutiveTranslationNumbering', Xml.BoolAttrib),
            ('consecutive_numbering_start', 'consecutiveNumberingStartsFrom', Xml.IntAttrib),
            ('translated_mw', 'translationMW', Xml.FloatAttrib),
            ('hits_stop_codon', 'hitsStopCodon', Xml.BoolAttrib),
    ]

[docs]    @classmethod
    def from_segment(cls, **kwargs):
        """
        Instantiate a feature with a single segment.

        Keyword arguments corresponding to any attribute of either the Feature 
        or FeatureSegment classes are accepted.
        """
        feature_kwargs = {
                k: v
                for k, v in kwargs.items() 
                if k in cls._defined_names
        }
        segment_kwargs = {
                k: v
                for k, v in kwargs.items() 
                if k not in cls._defined_names
        }

        feature = cls(**feature_kwargs)
        feature.segments = [FeatureSegment(**segment_kwargs)]
        return feature

[docs]    def get_segment(self):
        """
        If this feature has only one segment, return it.  Otherwise, raise an 
        exception.
        """
        return one(self.segments)

[docs]    def set_segment(self, segment):
        """
        Remove any segments associated with this feature, and replace them with 
        the given segment.

        After calling this setter (or assigning to this attribute), the feature 
        will have exactly one segment.  It will still be possible to add or 
        remove segments later, though.
        """
        self.segments = [segment]

[docs]    def get_range(self):
        return self.begin, self.end

[docs]    def get_begin(self):
        return min(x.range[0] for x in self.segments)

[docs]    def get_end(self):
        return max(x.range[1] for x in self.segments)

[docs]    def get_sequence(self, full_seq):
        # It'd be nice to get rid of the *full_seq* argument, but that would 
        # require this block to have access to the top-level `SnapGene` object.  
        # This might be ok, but it's a bigger change than I want to make right 
        # now.
        start, end = self.range
        seq = full_seq[start:end]

        if self.directionality == 'backward':
            return reverse_complement(seq)
        else:
            return seq

[docs]@autoprop
class FeatureSegment(Xml, Repr):
    repr_attrs = 'type', 'color', 'range'

[docs]    class RangeAttrib:

[docs]        @staticmethod
        def from_str(str):
            # Make the range indices compatible with the conventions for python 
            # slicing.
            i, j = tuple(int(x) for x in str.split('-'))
            return i - 1, j

[docs]        @staticmethod
        def to_str(value):
            i, j = value
            return f'{i+1}-{j}'

    xml_tag = 'Segment'
    xml_attrib_defs = [
            ('name', 'name', Xml.TextAttrib),
            ('range', 'range', RangeAttrib),
            ('display', 'type', Xml.TextAttrib),
            ('color', 'color', Xml.TextAttrib),
            ('is_translated', 'translated', Xml.BoolAttrib),
            ('translation_start_number', 'translationNumberingStartsFrom', Xml.IntAttrib),
    ]

[docs]    def get_begin(self):
        return self.range[0]

[docs]    def get_end(self):
        return self.range[1]