Source code for Environment.sortedDistance

# -*- coding: utf-8 -*-
""" sortedDistance: define function to measure of sortedness of permutations of [0..N-1].

- Cf. http://stevehanov.ca/blog/index.php?id=145 and https://stackoverflow.com/q/8206617
"""
from __future__ import division, print_function  # Python 2 compatibility

__author__ = "Lilian Besson"
__version__ = "0.6"

from difflib import SequenceMatcher
import numpy as np
import scipy.stats


[docs]def weightedDistance(choices, weights, n=None): """Relative difference between the best possible weighted choices and the actual choices. >>> weights = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] >>> choices = [8, 6, 5, 2] >>> weightedDistance(choices, weights) # not a bad choice # doctest: +ELLIPSIS 0.8333... >>> choices = [8, 6, 5, 7] >>> weightedDistance(choices, weights) # best choice! # doctest: +ELLIPSIS 1.000... >>> choices = [3, 2, 1, 0] >>> weightedDistance(choices, weights) # worst choice! # doctest: +ELLIPSIS 0.3333... """ if len(choices) == 0 or len(weights) == 0: return 0 choices = np.asarray(choices) weights = np.asarray(weights) if n is None: n = len(choices) bestWeights = np.sum(np.sort(weights)[-n:]) chosenWeights = np.sum(weights[choices[-n:]]) return chosenWeights / float(bestWeights)
[docs]def manhattan(permutation, comp=None): """A certain measure of sortedness for the list A, based on Manhattan distance. >>> perm = [0, 1, 2, 3, 4] >>> manhattan(perm) # sorted # doctest: +ELLIPSIS 1.0... >>> perm = [0, 1, 2, 5, 4, 3] >>> manhattan(perm) # almost sorted! # doctest: +ELLIPSIS 0.777... >>> perm = [2, 9, 6, 4, 0, 3, 1, 7, 8, 5] # doctest: +ELLIPSIS >>> manhattan(perm) 0.4 >>> perm = [2, 1, 6, 4, 0, 3, 5, 7, 8, 9] # better sorted! # doctest: +ELLIPSIS >>> manhattan(perm) 0.72 """ if len(permutation) == 0: return 0 if comp is None: comp = sorted(permutation) return 1 - (2 * sum(abs(comp[index] - element) for index, element in enumerate(permutation))) / (len(permutation) ** 2)
[docs]def kendalltau(permutation, comp=None): """A certain measure of sortedness for the list A, based on Kendall Tau ranking coefficient. >>> perm = [0, 1, 2, 3, 4] >>> kendalltau(perm) # sorted # doctest: +ELLIPSIS 0.98... >>> perm = [0, 1, 2, 5, 4, 3] >>> kendalltau(perm) # almost sorted! # doctest: +ELLIPSIS 0.90... >>> perm = [2, 9, 6, 4, 0, 3, 1, 7, 8, 5] >>> kendalltau(perm) # doctest: +ELLIPSIS 0.211... >>> perm = [2, 1, 6, 4, 0, 3, 5, 7, 8, 9] # better sorted! >>> kendalltau(perm) # doctest: +ELLIPSIS 0.984... """ if len(permutation) == 0: return 0 if comp is None: comp = sorted(permutation) res = 1 - scipy.stats.kendalltau(permutation, comp).pvalue if np.isnan(res): res = 0 return res
[docs]def spearmanr(permutation, comp=None): """A certain measure of sortedness for the list A, based on Spearman ranking coefficient. >>> perm = [0, 1, 2, 3, 4] >>> spearmanr(perm) # sorted # doctest: +ELLIPSIS 1.0... >>> perm = [0, 1, 2, 5, 4, 3] >>> spearmanr(perm) # almost sorted! # doctest: +ELLIPSIS 0.92... >>> perm = [2, 9, 6, 4, 0, 3, 1, 7, 8, 5] >>> spearmanr(perm) # doctest: +ELLIPSIS 0.248... >>> perm = [2, 1, 6, 4, 0, 3, 5, 7, 8, 9] # better sorted! >>> spearmanr(perm) # doctest: +ELLIPSIS 0.986... """ if len(permutation) == 0: return 0 if comp is None: comp = sorted(permutation) res = 1 - scipy.stats.spearmanr(permutation, comp).pvalue if np.isnan(res): res = 0 return res
[docs]def gestalt(permutation, comp=None): """A certain measure of sortedness for the list A, based on Gestalt pattern matching. >>> perm = [0, 1, 2, 3, 4] >>> gestalt(perm) # sorted # doctest: +ELLIPSIS 1.0... >>> perm = [0, 1, 2, 5, 4, 3] >>> gestalt(perm) # almost sorted! # doctest: +ELLIPSIS 0.666... >>> perm = [2, 9, 6, 4, 0, 3, 1, 7, 8, 5] >>> gestalt(perm) # doctest: +ELLIPSIS 0.4... >>> perm = [2, 1, 6, 4, 0, 3, 5, 7, 8, 9] # better sorted! >>> gestalt(perm) # doctest: +ELLIPSIS 0.5... >>> import random >>> random.seed(0) >>> ratings = [random.gauss(1200, 200) for i in range(100000)] >>> gestalt(ratings) # doctest: +ELLIPSIS 8e-05... """ if len(permutation) == 0: return 0 if comp is None: comp = sorted(permutation) return SequenceMatcher(None, permutation, comp).ratio()
[docs]def meanDistance(permutation, comp=None, methods=(manhattan, gestalt)): """A certain measure of sortedness for the list A, based on mean of the 2 distances: manhattan and gestalt. >>> perm = [0, 1, 2, 3, 4] >>> meanDistance(perm) # sorted # doctest: +ELLIPSIS 1.0 >>> perm = [0, 1, 2, 5, 4, 3] >>> meanDistance(perm) # almost sorted! # doctest: +ELLIPSIS 0.722... >>> perm = [2, 9, 6, 4, 0, 3, 1, 7, 8, 5] # doctest: +ELLIPSIS >>> meanDistance(perm) 0.4 >>> perm = [2, 1, 6, 4, 0, 3, 5, 7, 8, 9] # better sorted! # doctest: +ELLIPSIS >>> meanDistance(perm) 0.61 .. warning:: I removed :func:`kendalltau` and :func:`spearmanr` as they were giving 100% for many cases where clearly there were no reason to give 100%... """ distances = [] for method in methods: distances.append(method(permutation, comp=comp)) return np.mean(distances)
# Default distance sortedDistance = meanDistance # Only export and expose the useful functions defined here __all__ = [ "weightedDistance", "manhattan", "kendalltau", "spearmanr", "gestalt", "meanDistance", "sortedDistance", ] # --- Debugging if __name__ == "__main__": # Code for debugging purposes. from doctest import testmod print("\nTesting automatically all the docstring written in each functions of this module :") testmod(verbose=True)