Source code for auswahl.util._optimization

import numpy as np
from numpy.lib.stride_tricks import sliding_window_view


[docs]def optimize_intervals(n_intervals: int, interval_width: int, feature_scores: np.array): """The algorithm calculates the optimal non-overlapping placement of n_intervals of width interval_width into the range of features. The feature scores are specified in feature_scores (greater better). The algorithm can be used for instance to turn every point selection algorithm yielding a score for each feature (such as :class:`~auswahl.VIP`) into an interval selection algorithm. The runtime of the algorithm is :math:`O(kn)`, with k being n_intervals and n feature_scores.size Parameters ---------- n_intervals: int Number of intervals to be placed. interval_width: int Width of the intervals. feature_scores: np.array of shape (n, ) Array of scores of variables (greater better). Returns ------- tuple: (float, List[int]) Tuple of overall score of the interval placement, list of interval starts. """ if not isinstance(n_intervals, int): raise ValueError(f'optimize_intervals requires an integer for argument n_intervals. Got {type(n_intervals)}') if not isinstance(interval_width, int): raise ValueError(f'optimize_intervals requires an integer for argument interval_width.' f'Got {type(interval_width)}') if n_intervals <= 0: raise ValueError(f'optimize_intervals requires a positive integer for argument n_intervals. Got {n_intervals}') if interval_width <= 0: raise ValueError(f'optimize_intervals requires a positive integer for argument interval_width.' f'Got {interval_width}') if len(feature_scores.shape) != 1: raise ValueError(f'optimize_intervals requires an array of rank one. Got rank {len(feature_scores.shape)}') if feature_scores.size < n_intervals * interval_width: raise ValueError(f'optimize_intervals requires an array of feature scores with at least n_interval *' f'interval_width. Required at least {n_intervals * interval_width}, got {feature_scores.size}') # costs[i]: total score of the interval starting at i intervals = sliding_window_view(feature_scores, interval_width) interval_scores = np.sum(intervals, axis=1) # table[i, k] specifies the score of allocating in the range of features {0, ..., i} k intervals # initialize with sentinels # introduce index -1 sentinel into the first axis of the table table = -1000000 * np.ones((feature_scores.size + 1, n_intervals + 1)) table[:, 0] = 0 # zero costs for no intervals interval_starts = [[[] for _ in range(n_intervals + 1)] for _ in range(feature_scores.size)] for i in range(interval_width - 1, feature_scores.size): for k in range(1, min((i + 1) // interval_width, n_intervals) + 1): score_incl_i = interval_scores[i - interval_width + 1] + table[i - interval_width, k - 1] if score_incl_i > table[i-1, k]: table[i, k] = score_incl_i interval_starts[i][k] = interval_starts[i - interval_width][k - 1] + [i - interval_width + 1] else: table[i, k] = table[i - 1, k] interval_starts[i][k] = interval_starts[i - 1][k].copy() return table[feature_scores.size - 1, n_intervals], interval_starts[feature_scores.size - 1][n_intervals]