Source code for seqme.metrics.subset

from typing import Literal

import numpy as np

from seqme.core.base import Metric, MetricResult


[docs] class Subset(Metric): """A wrapper to approximate expensive metrics by evaluating a subset of the sequences in a group."""
[docs] def __init__( self, metric: Metric, *, n_samples: int, seed: int = 0, ): """ Initialize subset wrapper. Args: metric: Metric to compute. n_samples: Number of sequences to sample. seed: Seed for deterministic sampling. """ self.metric = metric self.n_samples = n_samples self.seed = seed if n_samples <= 0: raise ValueError("n_samples must be greater than 0.")
[docs] def __call__(self, sequences: list[str]) -> MetricResult: """ Compute the metric on a subset of sequences. Args: sequences: Sequences to sample a subset from and evaluate. Returns: MetricResult: Metric computed on a subset of the sequences. """ if len(sequences) < self.n_samples: raise ValueError( f"Too few sequences to subsample. Expected at least {self.n_samples} sequences, got {len(sequences)} sequences." ) rng = np.random.default_rng(self.seed) indices = rng.choice(np.arange(len(sequences), dtype=int), size=self.n_samples, replace=False) subset = [sequences[idx] for idx in indices] return self.metric(subset)
@property def name(self) -> str: return self.metric.name @property def objective(self) -> Literal["minimize", "maximize"]: return self.metric.objective