#!/usr/bin/env python3
from ..parser import Block, Xml, Repr
from ..util import reverse_complement
import autoprop
import xml.etree.ElementTree as etree
from more_itertools import one, always_iterable
@autoprop
class FeaturesBlock(Xml, Block):
block_id = 10
repr_attrs = ['features']
# I'm not totally sure whats up with the id numbers in this block. They
# aren't mentioned in the 2015 spec...
class FeatureTag(Xml.AppendListTag):
@staticmethod
def from_xml(element):
return Feature.from_xml(element)
@staticmethod
def to_xml(parent, tag, features):
for feature in features:
e = feature.to_xml()
parent.append(e)
xml_tag = 'Features'
xml_subtag_defs = [
('features', 'Feature', FeatureTag, []),
]
xml_attrib_defs = [
('_next_id', 'nextValidID', Xml.IntAttrib),
# I don't know what this is at all...
('_recycled_ids', 'recycledIDs', Xml.TextAttrib),
]
def __repr_attr__(self, attr):
if attr == 'features':
from textwrap import shorten
return shorten(
', '.join(x.name for x in self.features),
width=32,
placeholder='...',
)
else:
return super().__repr_attr__(attr)
def to_xml(self):
self._next_id = len(self.features)
return super().to_xml()
def get_next_id(self):
return len(self.features)
[docs]@autoprop
class Feature(Xml, Repr):
repr_attrs = 'name', 'type'
[docs] class SegmentTag(Xml.AppendListTag):
[docs] @staticmethod
def from_xml(element):
return FeatureSegment.from_xml(element)
[docs] @staticmethod
def to_xml(parent, tag, segments):
for segment in segments:
e = segment.to_xml()
parent.append(e)
[docs] class QualifierTag(Xml.UpdateDictTag):
data_types = {
'int': int,
'text': str,
}
data_formats = {v: k for k, v in data_types.items()}
[docs] @classmethod
def from_xml(cls, element):
name = element.attrib['name']
def get_value(sub):
# This isn't in the spec, but I assume I'll never get multiple
# attributes in a <V> tag.
#
# However, it is possible to get no attributes. I've only
# encountered this is one example: a translated feature that
# was too short to actually have a translation (e.g. 1 bp). In
# this case, value probably should be "", since "translation"
# is normally text. Assuming that integer data will never just
# be left out like this, I'm going to interpret no attributes
# as "".
if not sub.attrib:
return ""
assert len(sub.attrib) == 1, etree.tostring(element)
key, value = sub.attrib.popitem()
return cls.data_types.get(key, str)(value)
if len(element) == 1:
value = get_value(element.find('V'))
else:
value = [get_value(x) for x in element.findall('V')]
return {name: value}
[docs] @classmethod
def to_xml(cls, parent, tag, values):
for name, value in values.items():
q = etree.SubElement(parent, tag)
q.attrib['name'] = str(name)
for value_i in always_iterable(value):
v = etree.SubElement(q, 'V')
# Special-case empty strings as described in from_xml().
if value_i == "":
continue
data_format = cls.data_formats[type(value_i)]
v.attrib[data_format] = str(value_i)
[docs] class DirectionalityAttrib(Xml.EnumAttrib):
value_from_str = {
'0': 'none',
'1': 'forward',
'2': 'backward',
'3': 'bidirectional',
}
[docs] class CleavageArrowsAttrib:
[docs] @staticmethod
def from_str(str):
return [int(x) for x in str.split(',')]
[docs] @staticmethod
def to_str(value):
return ','.join(str(x) for x in value)
xml_tag = 'Feature'
xml_subtag_defs = [
('segments', 'Segment', SegmentTag, []),
('qualifiers', 'Q', QualifierTag, {}),
]
xml_attrib_defs = [
('id', 'recentID', Xml.IntAttrib),
('name', 'name', Xml.TextAttrib),
('type', 'type', Xml.TextAttrib),
('directionality', 'directionality', DirectionalityAttrib),
('reading_frame', 'readingFrame', Xml.IntAttrib),
('cleavage_arrows', 'cleavageArrows', CleavageArrowsAttrib),
('allow_segment_overlaps', 'allowSegmentOverlaps', Xml.BoolAttrib),
('swapped_segment_numbering', 'swappedSegmentNumbering', Xml.BoolAttrib),
('max_run_on', 'maxRunOn', Xml.IntAttrib),
('max_fused_run_on', 'maxFusedRunOn', Xml.IntAttrib),
('detection_mode', 'detectionMode', Xml.TextAttrib),
# Translation-related attributes:
('genetic_code_id', 'geneticCode', Xml.TextAttrib),
('first_codon_met', 'translateFirstCodonAsMet', Xml.BoolAttrib),
('consecutive_translation_numbering', 'consecutiveTranslationNumbering', Xml.BoolAttrib),
('consecutive_numbering_start', 'consecutiveNumberingStartsFrom', Xml.IntAttrib),
('translated_mw', 'translationMW', Xml.FloatAttrib),
('hits_stop_codon', 'hitsStopCodon', Xml.BoolAttrib),
]
[docs] @classmethod
def from_segment(cls, **kwargs):
"""
Instantiate a feature with a single segment.
Keyword arguments corresponding to any attribute of either the Feature
or FeatureSegment classes are accepted.
"""
feature_kwargs = {
k: v
for k, v in kwargs.items()
if k in cls._defined_names
}
segment_kwargs = {
k: v
for k, v in kwargs.items()
if k not in cls._defined_names
}
feature = cls(**feature_kwargs)
feature.segments = [FeatureSegment(**segment_kwargs)]
return feature
[docs] def get_segment(self):
"""
If this feature has only one segment, return it. Otherwise, raise an
exception.
"""
return one(self.segments)
[docs] def set_segment(self, segment):
"""
Remove any segments associated with this feature, and replace them with
the given segment.
After calling this setter (or assigning to this attribute), the feature
will have exactly one segment. It will still be possible to add or
remove segments later, though.
"""
self.segments = [segment]
[docs] def get_range(self):
return self.begin, self.end
[docs] def get_begin(self):
return min(x.range[0] for x in self.segments)
[docs] def get_end(self):
return max(x.range[1] for x in self.segments)
[docs] def get_sequence(self, full_seq):
# It'd be nice to get rid of the *full_seq* argument, but that would
# require this block to have access to the top-level `SnapGene` object.
# This might be ok, but it's a bigger change than I want to make right
# now.
start, end = self.range
seq = full_seq[start:end]
if self.directionality == 'backward':
return reverse_complement(seq)
else:
return seq
[docs]@autoprop
class FeatureSegment(Xml, Repr):
repr_attrs = 'type', 'color', 'range'
[docs] class RangeAttrib:
[docs] @staticmethod
def from_str(str):
# Make the range indices compatible with the conventions for python
# slicing.
i, j = tuple(int(x) for x in str.split('-'))
return i - 1, j
[docs] @staticmethod
def to_str(value):
i, j = value
return f'{i+1}-{j}'
xml_tag = 'Segment'
xml_attrib_defs = [
('name', 'name', Xml.TextAttrib),
('range', 'range', RangeAttrib),
('display', 'type', Xml.TextAttrib),
('color', 'color', Xml.TextAttrib),
('is_translated', 'translated', Xml.BoolAttrib),
('translation_start_number', 'translationNumberingStartsFrom', Xml.IntAttrib),
]
[docs] def get_begin(self):
return self.range[0]
[docs] def get_end(self):
return self.range[1]