"""
Filters
-------
Filters operate on the sets of location indices, the ``LocIndex`` objects, returned by ``Genomes``
using ``targets`` or ``covered_targets``. There are two main filters:
1. ``CoverageFilter``
2. ``CategoryCodeFilter``
The ``CoverageFilter`` is used to create the ``covered_targets`` returned by the ``Genome``.
The ``CategoryCodeFilter`` is used to restrict the returned sets of ``LocIndex`` objects to
specific types of mutations e.g., only ``BinOp``, only ``Compare``, or a combination of multiple
mutation categories.
Both of these filters are implemented in ``Genome`` and ``GenomeGroup`` for basic usage in
filtering by category code or covered lines.
"""
import itertools
import logging
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, Iterable, Optional, Set, Union, ValuesView
from coverage.data import CoverageData # type: ignore
from mutatest import transformers
from mutatest.transformers import CATEGORIES, LocIndex
LOGGER = logging.getLogger(__name__)
####################################################################################################
# ABSTRACT BASE CLASS
####################################################################################################
[docs]class Filter(ABC):
"""Abstract Base Class for filters, interface should include a filter method."""
[docs] @abstractmethod
def filter(self, loc_idxs: Set[LocIndex], invert: bool = False) -> Set[LocIndex]:
"""General filter method that should return a location index set.
A filter should take a set of location indices (``loc_idxs``) and return
the filtered set of location indices. The invert kwarg is set as a reversible filter e.g.,
to specify NOT for the filtering effect.
Other args or kwargs may be required so this is not a hard-enforced signature.
"""
raise NotImplementedError
####################################################################################################
# FILTER IMPLEMENTATIONS
####################################################################################################
[docs]class CoverageFilter(Filter):
"""Filter for covered lines to be applied to mutation targets in Genome."""
def __init__(self, coverage_file: Union[str, Path] = Path(".coverage")) -> None:
"""Initialize the filter.
Args:
coverage_file: an optional coverage file, a default ".coverage" is used.
"""
self._coverage_file = Path(coverage_file)
self._coverage_data: Optional[CoverageData] = None
@property
def coverage_file(self) -> Path:
"""Property accessor for ``_coverage_file`` set at initialization.
Returns:
The coverage file path.
"""
return self._coverage_file
@coverage_file.setter
def coverage_file(self, value: Union[str, Path]) -> None:
"""Setter for the coverage file, clears local cache of CoverageData.
Args:
value: The path to the coverage file
Returns:
None
"""
self._coverage_file = Path(value)
self._coverage_data = None
@property
def coverage_data(self) -> CoverageData:
"""Read the coverage file for lines and arcs data.
This is cached locally and updated if the coverage_file is changed.
Returns:
A CoverageData object based on the ``coverage_file``.
Raises:
FileNotFoundError: if coverage_file does not exist.
"""
if not self.coverage_file.exists():
raise FileNotFoundError(
f"{self.coverage_file.resolve()} does not exist. "
"Set the coverage_file property to a valid file."
)
if self._coverage_data is None:
try:
# Coverage v 4.5.4
# https://coverage.readthedocs.io/en/coverage-4.5.4/api_coveragedata.html#coverage.CoverageData.read_file
self._coverage_data = CoverageData()
self._coverage_data.read_file(self.coverage_file)
except AttributeError:
# Coverage v 5.0.0
# https://coverage.readthedocs.io/en/coverage-5.0/api_coveragedata.html#coverage.CoverageData.read
self._coverage_data = CoverageData(basename=str(self.coverage_file.resolve()))
self._coverage_data.read()
return self._coverage_data
[docs] def filter( # type: ignore
self,
loc_idxs: Set[LocIndex],
source_file: Union[str, Path],
invert: bool = False,
resolve_source: bool = True,
) -> Set[LocIndex]:
"""Filter based on coverage measured file.
This adds the source_file argument to the filter abstract method because the coverage
file holds multiple measured-files, and the ``LocIndex`` object does not have a source
file attribute. The choice is that the coverage file can be set and read once for the
class instance, and any valid measured file can be used in the filter.
Args:
loc_idxs: location index set of targets
source_file: source file that is measured by the coverage file
invert: flag for inverted filter using NOT
resolve_source: flag for using resolved source_file vs. direct str, default True.
This exists mostly for testing purposes to access mocked entries in the
fake coverage files.
Returns:
Filtered set of location index set
"""
measured_file = str(Path(source_file).resolve()) if resolve_source else str(source_file)
covered_lines = self.coverage_data.lines(measured_file) or list()
if invert:
return {loc for loc in loc_idxs if loc.lineno not in covered_lines}
return {loc for loc in loc_idxs if loc.lineno in covered_lines}
[docs]class CategoryCodeFilter(Filter):
"""Filter by mutation category code."""
def __init__(self, codes: Optional[Iterable[str]] = None):
"""Initialize the filter.
Args:
codes: An optional iterable of two-letter category codes for filtering.
Optional to set at initialization of the class, can be set through properties.
The codes property must be set prior to filtering.
Only codes that are valid categories are added, others are discarded.
Make sure you set appropriately as an iterable for single string values e.g.,
``codes=("bn",)``; otherwise, the codes property will set as empty.
"""
# managed by class properties, no direct setters
self._valid_categories = CATEGORIES # defined in transformers.py
self._codes: Set[str] = set()
# initialize through properties
self.codes = set(codes) if codes else set()
@property
def valid_categories(self) -> Dict[str, str]:
"""All valid categories with descriptive name and 2 letter code.
Returns:
The categories defined in transformers.
"""
return self._valid_categories
@property
def valid_codes(self) -> ValuesView[str]:
"""All valid 2 letter codes.
Returns:
View of the values of ``valid_categories``.
"""
return self._valid_categories.values()
@property
def codes(self) -> Set[str]:
"""Getter for the codes set for filtering purposes.
Returns:
Set of 2 letter codes used in filtering.
"""
return self._codes
@codes.setter
def codes(self, value: Iterable[str]) -> None:
"""Set the codes to a new value (full replacement of the set).
Only codes that are valid categories are added, all others are discarded.
Args:
value: the set of 2-letter codes.
Returns:
None
"""
self._codes = {v for v in value if v in self.valid_codes}
@property
def valid_mutations(self) -> Set[Any]:
"""Valid mutations for the set of category codes.
Returns:
Set of valid mutations for the codes, types will vary
"""
# unpack iterable of sets of compatible operations defined in transformers
return set(
itertools.chain.from_iterable(
op.operations
for op in transformers.get_compatible_operation_sets()
if op.category in self.codes
)
)
[docs] def add_code(self, code: str) -> None:
"""Add a single 2-letter code to the codes set for the class.
Args:
code: a valid 2 letter code
Returns:
None
Raises:
ValueError: if an invalid code is passed.
"""
if code not in self.valid_codes:
raise ValueError(f"{code} is not an allowed code.")
self._codes.add(code)
[docs] def discard_code(self, code: str) -> None:
"""Discard a 2-letter code from the codes set.
This uses the built-in ``set.discard()`` so that a KeyError is not raised if the code
does not exist in the set already.
Args:
code: the 2-letter code to discard
Returns:
None
"""
self._codes.discard(code)
[docs] def filter(self, loc_idxs: Set[LocIndex], invert: bool = False) -> Set[LocIndex]:
"""Filter a set of location indices based on the set codes.
If the codes property is an empty set, the ``loc_idxs`` is returned unmodified.
Args:
loc_idxs: the set of location indices to filter.
invert: flag for inverted filtering using NOT
Returns:
Set of location indices with the filter applied.
"""
if not self.codes:
return loc_idxs
if invert:
return {loc for loc in loc_idxs if loc.op_type not in self.valid_mutations}
return {loc for loc in loc_idxs if loc.op_type in self.valid_mutations}