Source code for nachos.similarity_functions.SimilarityFunctions

from typing import Optional, Union, List
from import Data, Dataset
from nachos.similarity_functions.abstract_similarity import AbstractSimilarity

[docs]class SimilarityFunctions(object):
[docs] @classmethod def build(cls, conf: dict): fns = [ for fn in conf['similarity_functions']] weights = conf['factor_weights'] return cls(fns, weights)
[docs] def __init__(self, fns: List[AbstractSimilarity], weights: List[float]): self.fns = fns self.weights = weights
[docs] def __call__(self, u: Dataset, v: Dataset, n: Optional[int] = None ) -> float: ''' Summary: This function is overloaded to operate with a few different kinds of data. It can either work to compare the similarities between two data points, between a data point and a dataset, or either of the previous two functions with respect to a single factor, n. Inputs ----------------- :param u: A data point (defined by the Dataset class) :type u: Dataset :param v: A data set :type v: Dataset :param n: The index of the factor with respect to which to compute similarity. None means use the sum of all factors :type n: Optional[int] Returns ------------------- :return: The similarity score :rtype: float ''' if len(u) == 1 and len(v) == 1 and n is None: factors_zipped = zip( self.weights, self.fns, u.factors.values(), v.factors.values(), ) return sum(w*fn(f, g) for w, fn, f, g in factors_zipped) elif len(u) == 1 and len(v) > 1: return self.score_set([0], v, n) elif len(u) == 1 and len(v) == 1 and n is not None: return self.score([0],[0], n) else: raise NotImplementedError
[docs] def score(self, u: Data, v: Data, n: int) -> float: return self.fns[n](u.factors[n], v.factors[n])
[docs] def score_set(self, u: Data, v: Dataset, n: Optional[int] = None) -> float: if n is not None: return max(self.score(u,[0], n) for x in v) else: # Return the maximum sum return max( sum( w*self.score(u,[0], n) for w, n in zip(self.weights, range(len(u.factors))) ) for x in v )