Source code for glue.core.subset

from __future__ import absolute_import, division, print_function

import numbers
import operator

import numpy as np

from glue.external.six import PY3
from glue.core.roi import CategoricalROI
from glue.core.contracts import contract
from glue.core.util import split_component_view
from glue.core.registry import Registry
from glue.core.exceptions import IncompatibleAttribute
from glue.core.message import SubsetDeleteMessage, SubsetUpdateMessage
from glue.core.decorators import memoize
from glue.core.visual import VisualAttributes
from glue.config import settings
from glue.utils import view_shape


__all__ = ['Subset', 'SubsetState', 'RoiSubsetState', 'CompositeSubsetState',
           'OrState', 'AndState', 'XorState', 'InvertState',
           'ElementSubsetState', 'RangeSubsetState', 'combine_multiple']

OPSYM = {operator.ge: '>=', operator.gt: '>',
         operator.le: '<=', operator.lt: '<',
         operator.and_: '&', operator.or_: '|',
         operator.xor: '^', operator.eq: '==',
         operator.ne: '!='}
SYMOP = dict((v, k) for k, v in OPSYM.items())


[docs]class Subset(object): """Base class to handle subsets of data. These objects both describe subsets of a dataset, and relay any state changes to the hub that their parent data are assigned to. This base class only directly impements the logic that relays state changes back to the hub. Subclasses implement the actual description and manipulation of data subsets :param data: The dataset that this subset describes :type data: :class:`~glue.core.data.Data` """ @contract(data='isinstance(Data)|None', color='color', alpha=float, label='string|None') def __init__(self, data, color=settings.SUBSET_COLORS[0], alpha=0.5, label=None): """ Create a new subset object. Note: the preferred way for creating subsets is via DataCollection.new_subset_group. Manually-instantiated subsets will probably *not* be represented properly by the UI """ self._broadcasting = False # must be first def self.data = data self._subset_state = None self._label = None self._style = None self._setup(color, alpha, label) @contract(color='color', alpha='float', label='string|None') def _setup(self, color, alpha, label): self.color = color self.label = label # trigger disambiguation self.style = VisualAttributes(parent=self) self.style.markersize *= 1.5 self.style.color = color self.style.alpha = alpha self.subset_state = SubsetState() # calls proper setter method @property def subset_state(self): return self._subset_state @subset_state.setter def subset_state(self, state): if isinstance(state, np.ndarray): if self.data.shape != state.shape: raise ValueError("Shape of mask doesn't match shape of data") cids = self.data.pixel_component_ids state = MaskSubsetState(state, cids) if not isinstance(state, SubsetState): raise TypeError("State must be a SubsetState instance or array") self._subset_state = state @property def style(self): return self._style @style.setter @contract(value=VisualAttributes) def style(self, value): value.parent = self self._style = value @property def label(self): """ Convenience access to subset's label """ return self._label @label.setter def label(self, value): """Set the subset's label Subset labels within a data object must be unique. The input will be auto-disambiguated if necessary """ value = Registry().register(self, value, group=self.data) self._label = value @property def attributes(self): """ Returns a tuple of the ComponentIDs that this subset depends upon """ return self.subset_state.attributes
[docs] def register(self): """ Register a subset to its data, and start broadcasting state changes """ self.data.add_subset(self) self.do_broadcast(True)
@contract(returns='array[N]')
[docs] def to_index_list(self): """ Convert the current subset to a list of indices. These index the elements in the (flattened) data object that belong to the subset. If x is the numpy array corresponding to some component.data, the two following statements are equivalent:: x.flat[subset.to_index_list()] x[subset.to_mask()] Returns: A numpy array, giving the indices of elements in the data that belong to this subset. Raises: IncompatibleDataException: if an index list cannot be created for the requested data set. """ try: return self.subset_state.to_index_list(self.data) except IncompatibleAttribute as exc: try: return self._to_index_list_join() except IncompatibleAttribute: raise exc
def _to_index_list_join(self): return np.where(self._to_mask_join(None).flat)[0] def _to_mask_join(self, view): """Conver the subset to a mask through an entity join to another dataset. """ for other, (cid1, cid2) in self.data._key_joins.items(): if getattr(other, '_recursing', False): continue try: self.data._recursing = True s2 = Subset(other) s2.subset_state = self.subset_state key_right = s2.to_mask() except IncompatibleAttribute: continue finally: self.data._recursing = False key_left = self.data[cid1, view] result = np.in1d(key_left.ravel(), other[cid2, key_right]) return result.reshape(key_left.shape) raise IncompatibleAttribute @contract(view='array_view', returns='array')
[docs] def to_mask(self, view=None): """ Convert the current subset to a mask. :param view: An optional view into the dataset (e.g. a slice) If present, the mask will pertain to the view and not the entire dataset. Returns: A boolean numpy array, the same shape as the data, that defines whether each element belongs to the subset. """ try: return self.subset_state.to_mask(self.data, view) except IncompatibleAttribute as exc: try: return self._to_mask_join(view) except IncompatibleAttribute: raise exc
@contract(value=bool)
[docs] def do_broadcast(self, value): """ Set whether state changes to the subset are relayed to a hub. It can be useful to turn off broadcasting, when modifying the subset in ways that don't impact any of the clients. Attributes: value: Whether the subset should broadcast state changes (True/False) """ object.__setattr__(self, '_broadcasting', value)
@contract(attribute='string')
[docs] def broadcast(self, attribute): """ Explicitly broadcast a SubsetUpdateMessage to the hub :param attribute: The name of the attribute (if any) that should be broadcast as updated. :type attribute: ``str`` """ if not hasattr(self, 'data') or not hasattr(self.data, 'hub'): return if self._broadcasting and self.data.hub: msg = SubsetUpdateMessage(self, attribute=attribute) self.data.hub.broadcast(msg)
[docs] def delete(self): """Broadcast a SubsetDeleteMessage to the hub, and stop broadcasting Also removes subset reference from parent data's subsets list """ dobroad = self._broadcasting and self.data is not None and \ self.data.hub is not None self.do_broadcast(False) if self.data is not None and self in self.data.subsets: self.data._subsets.remove(self) if dobroad: msg = SubsetDeleteMessage(self) self.data.hub.broadcast(msg) Registry().unregister(self, group=self.data)
@contract(file_name='string')
[docs] def write_mask(self, file_name, format="fits"): """ Write a subset mask out to file :param file_name: name of file to write to :param format: Name of format to write to. Currently, only "fits" is supported """ mask = np.short(self.to_mask()) if format == 'fits': try: from astropy.io import fits fits.writeto(file_name, mask, clobber=True) except ImportError: raise ImportError("Cannot write mask -- requires astropy") else: raise AttributeError("format not supported: %s" % format)
@contract(file_name='string')
[docs] def read_mask(self, file_name): try: from astropy.io import fits mask = fits.open(file_name)[0].data except ImportError: raise ImportError("Cannot read mask -- requires astropy") except IOError: raise IOError("Could not read %s (not a fits file?)" % file_name) ind = np.where(mask.flat)[0] state = ElementSubsetState(indices=ind) self.subset_state = state
def __del__(self): self.delete() def __setattr__(self, attribute, value): object.__setattr__(self, attribute, value) if not attribute.startswith('_'): self.broadcast(attribute) def __getitem__(self, view): """ Retrieve the elements from a data view within the subset :param view: View of the data. See data.__getitem__ for detils """ c, v = split_component_view(view) ma = self.to_mask(v) return self.data[view][ma] @contract(other_subset='isinstance(Subset)')
[docs] def paste(self, other_subset): """paste subset state from other_subset onto self """ state = other_subset.subset_state.copy() self.subset_state = state
def __str__(self): dlabel = "(no data)" if self.data is not None: dlabel = "(data: %s)" % self.data.label slabel = "Subset: (no label)" if self.label: slabel = "Subset: %s" % self.label return "%s %s" % (slabel, dlabel) def __repr__(self): return self.__str__() @contract(other='isinstance(Subset)', returns='isinstance(Subset)') def __or__(self, other): return _combine([self, other], operator.or_) @contract(other='isinstance(Subset)', returns='isinstance(Subset)') def __and__(self, other): return _combine([self, other], operator.and_) @contract(returns='isinstance(Subset)') def __invert__(self): return _combine([self], operator.invert) @contract(other='isinstance(Subset)', returns='isinstance(Subset)') def __xor__(self, other): return _combine([self, other], operator.xor) def __eq__(self, other): if not isinstance(other, Subset): return False # XXX need to add equality specification for subset states return (self.subset_state == other.subset_state and self.style == other.style)
[docs] def state_as_mask(self): """ Convert the current SubsetState to a MaskSubsetState """ try: m = self.to_mask() except IncompatibleAttribute: m = np.zeros(self.data.shape, dtype=np.bool) cids = self.data.pixel_component_ids return MaskSubsetState(m, cids)
# In Python 2 we need to do this explicitly def __ne__(self, other): return not self.__eq__(other) # In Python 3, if __eq__ is defined, then __hash__ has to be re-defined if PY3: __hash__ = object.__hash__
[docs]class SubsetState(object): def __init__(self): pass @property def attributes(self): return tuple() @property def subset_state(self): # convenience method, mimic interface of Subset return self @contract(data='isinstance(Data)')
[docs] def to_index_list(self, data): return np.where(self.to_mask(data).flat)[0]
@contract(data='isinstance(Data)', view='array_view')
[docs] def to_mask(self, data, view=None): shp = view_shape(data.shape, view) return np.zeros(shp, dtype=bool)
@contract(returns='isinstance(SubsetState)')
[docs] def copy(self): return SubsetState()
@contract(other_state='isinstance(SubsetState)', returns='isinstance(SubsetState)') def __or__(self, other_state): return OrState(self, other_state) @contract(other_state='isinstance(SubsetState)', returns='isinstance(SubsetState)') def __and__(self, other_state): return AndState(self, other_state) @contract(returns='isinstance(SubsetState)') def __invert__(self): return InvertState(self) @contract(other_state='isinstance(SubsetState)', returns='isinstance(SubsetState)') def __xor__(self, other_state): return XorState(self, other_state)
[docs]class RoiSubsetState(SubsetState): def __init__(self, xatt=None, yatt=None, roi=None): super(RoiSubsetState, self).__init__() self.xatt = xatt self.yatt = yatt self.roi = roi @property def attributes(self): return (self.xatt, self.yatt) @memoize @contract(data='isinstance(Data)', view='array_view')
[docs] def to_mask(self, data, view=None): x = data[self.xatt, view] y = data[self.yatt, view] result = self.roi.contains(x, y) assert x.shape == result.shape return result
[docs] def copy(self): result = RoiSubsetState() result.xatt = self.xatt result.yatt = self.yatt result.roi = self.roi return result
class CategoricalROISubsetState(SubsetState): def __init__(self, att=None, roi=None): super(CategoricalROISubsetState, self).__init__() self.att = att self.roi = roi @property def attributes(self): return self.att, @memoize @contract(data='isinstance(Data)', view='array_view') def to_mask(self, data, view=None): x = data.get_component(self.att)._categorical_data[view] result = self.roi.contains(x, None) assert x.shape == result.shape return result.ravel() def copy(self): result = CategoricalROISubsetState() result.att = self.att result.roi = self.roi return result @staticmethod def from_range(component, att, lo, hi): roi = CategoricalROI.from_range(component, lo, hi) subset = CategoricalROISubsetState(roi=roi, att=att) return subset
[docs]class RangeSubsetState(SubsetState): def __init__(self, lo, hi, att=None): super(RangeSubsetState, self).__init__() self.lo = lo self.hi = hi self.att = att @property def attributes(self): return (self.att,) @contract(data='isinstance(Data)', view='array_view')
[docs] def to_mask(self, data, view=None): x = data[self.att, view] result = (x >= self.lo) & (x <= self.hi) return result
[docs] def copy(self): return RangeSubsetState(self.lo, self.hi, self.att)
class MultiRangeSubsetState(SubsetState): """ A subset state defined by multiple discontinuous ranges Parameters ---------- pairs : list A list of (lo, hi) tuples """ def __init__(self, pairs, att=None): super(MultiRangeSubsetState, self).__init__() self.pairs = pairs self.att = att @property def attributes(self): return (self.att,) @contract(data='isinstance(Data)', view='array_view') def to_mask(self, data, view=None): x = data[self.att, view] result = np.zeros_like(x, dtype=bool) for lo, hi in self.pairs: result |= (x >= lo) & (x <= hi) return result def copy(self): return MultiRangeSubsetState(self.pairs, self.att)
[docs]class CompositeSubsetState(SubsetState): op = None def __init__(self, state1, state2=None): super(CompositeSubsetState, self).__init__() self.state1 = state1.copy() if state2: state2 = state2.copy() self.state2 = state2
[docs] def copy(self): return type(self)(self.state1, self.state2)
@property def attributes(self): att = self.state1.attributes if self.state2 is not None: att += self.state2.attributes return tuple(sorted(set(att))) @memoize @contract(data='isinstance(Data)', view='array_view')
[docs] def to_mask(self, data, view=None): return self.op(self.state1.to_mask(data, view), self.state2.to_mask(data, view))
def __str__(self): sym = OPSYM.get(self.op, self.op) return "(%s %s %s)" % (self.state1, sym, self.state2)
[docs]class OrState(CompositeSubsetState): op = operator.or_
[docs]class AndState(CompositeSubsetState): op = operator.and_
[docs]class XorState(CompositeSubsetState): op = operator.xor
[docs]class InvertState(CompositeSubsetState): @memoize @contract(data='isinstance(Data)', view='array_view')
[docs] def to_mask(self, data, view=None): return ~self.state1.to_mask(data, view)
def __str__(self): return "(~%s)" % self.state1
class MaskSubsetState(SubsetState): """ A subset defined by boolean pixel mask """ def __init__(self, mask, cids): """ :param cids: List of ComponentIDs, defining the pixel coordinate space of the mask :param mask: Boolean ndarray """ self.cids = cids self.mask = mask def to_mask(self, data, view=None): view = view or slice(None) # shortcut for data on the same pixel grid if data.pixel_component_ids == self.cids: return self.mask[view].copy() # locate each element of data in the coordinate system of the mask vals = [data[c, view].astype(np.int) for c in self.cids] result = self.mask[vals] for v, n in zip(vals, data.shape): result &= ((v >= 0) & (v < n)) return result def __gluestate__(self, context): return dict(cids=[context.id(c) for c in self.cids], mask=context.do(self.mask)) @classmethod def __setgluestate__(cls, rec, context): return cls(context.object(rec['mask']), [context.object(c) for c in rec['cids']]) class CategorySubsetState(SubsetState): def __init__(self, attribute, values): super(CategorySubsetState, self).__init__() self._attribute = attribute self._values = np.asarray(values).ravel() @memoize def to_mask(self, data, view=None): vals = data[self._attribute, view] result = np.in1d(vals.ravel(), self._values) return result.reshape(vals.shape) def copy(self): return CategorySubsetState(self._attribute, self._values.copy()) def __gluestate__(self, context): return dict(att=context.id(self._attribute), vals=context.do(self._values)) @classmethod def __setgluestate__(cls, rec, context): return cls(context.object(rec['att']), context.object(rec['vals']))
[docs]class ElementSubsetState(SubsetState): def __init__(self, indices=None): super(ElementSubsetState, self).__init__() self._indices = indices @memoize
[docs] def to_mask(self, data, view=None): # XXX this is inefficient for views result = np.zeros(data.shape, dtype=bool) if self._indices is not None: result.flat[self._indices] = True if view is not None: result = result[view] return result
[docs] def copy(self): return ElementSubsetState(self._indices)
class InequalitySubsetState(SubsetState): def __init__(self, left, right, op): from glue.core.component_link import ComponentLink super(InequalitySubsetState, self).__init__() from glue.core.data import ComponentID valid_ops = [operator.gt, operator.ge, operator.lt, operator.le, operator.eq, operator.ne] if op not in valid_ops: raise TypeError("Invalid boolean operator: %s" % op) if not isinstance(left, ComponentID) and not \ isinstance(left, numbers.Number) and not \ isinstance(left, ComponentLink): raise TypeError("Input must be ComponenID or NumberType: %s" % type(left)) if not isinstance(right, ComponentID) and not \ isinstance(right, numbers.Number) and not \ isinstance(right, ComponentLink): raise TypeError("Input must be ComponenID or NumberType: %s" % type(right)) self._left = left self._right = right self._operator = op @property def left(self): return self._left @property def right(self): return self._right @property def operator(self): return self._operator @memoize def to_mask(self, data, view=None): left = self._left if not isinstance(self._left, numbers.Number): left = data[self._left, view] right = self._right if not isinstance(self._right, numbers.Number): right = data[self._right, view] return self._operator(left, right) def copy(self): return InequalitySubsetState(self._left, self._right, self._operator) def __str__(self): sym = OPSYM.get(self._operator, self._operator) return "(%s %s %s)" % (self._left, sym, self._right) def __repr__(self): return '<%s: %s>' % (self.__class__.__name__, self) @contract(subsets='list(isinstance(Subset))', returns=Subset) def _combine(subsets, operator): state = operator(*[s.subset_state for s in subsets]) result = Subset(None) result.subset_state = state return result
[docs]def combine_multiple(subsets, operator): if len(subsets) == 0: return SubsetState() else: combined = subsets[0] for subset in subsets[1:]: combined = operator(combined, subset) return combined