Some more changes and tests

maniteja123 · maniteja123 · commit a12e7ad0a83d · 2016-02-02T19:55:37.000+05:30
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
@@ -26,6 +26,7 @@
 
 from ..utils.validation import check_array, check_consistent_length
 from ..utils.validation import column_or_1d
+from ..utils.extmath import weighted_median
 from ..externals.six import string_types
 
 import warnings
@@ -241,16 +242,6 @@ def mean_squared_error(y_true, y_pred,
     return np.average(output_errors, weights=multioutput)
 
 
-def _weighted_percentile(array, sample_weight, percentile=50):
-    sorted_idx = np.argsort(array)
-    sample_weight = np.array(sample_weight)
-    weight_cdf = sample_weight[sorted_idx].cumsum()
-    weighted_percentile = (weight_cdf - sample_weight/2.0) / weight_cdf[-1]
-    sorted_array = np.sort(array)
-    weighted_median = np.interp(percentile/100., weighted_percentile, sorted_array)
-    return weighted_median
-
-
 def median_absolute_error(y_true, y_pred, sample_weight=None):
     """Median absolute error regression loss
 
@@ -285,15 +276,14 @@ def median_absolute_error(y_true, y_pred, sample_weight=None):
                                                    'uniform_average')
     if y_type == 'continuous-multioutput':
         raise ValueError("Multioutput not supported in median_absolute_error")
-
     if sample_weight is None:
         return np.median(np.abs(y_pred - y_true))
     else:
         check_consistent_length(y_pred, sample_weight)
         sample_weight = np.array(sample_weight)
         y_pred = y_pred.ravel()
         y_true = y_true.ravel()
-        return _weighted_percentile(np.abs(y_pred - y_true),
+        return weighted_median(np.abs(y_pred - y_true),
                                     np.asarray(sample_weight))
 
 
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -361,7 +361,6 @@
 METRICS_WITHOUT_SAMPLE_WEIGHT = [
     "cohen_kappa_score",
     "confusion_matrix",
-    "median_absolute_error",
 ]
 
 
@@ -919,10 +918,12 @@ def check_sample_weight_invariance(name, metric, y1, y2):
 
     # check that the weighted and unweighted scores are unequal
     weighted_score = metric(y1, y2, sample_weight=sample_weight)
-    assert_not_equal(
-        unweighted_score, weighted_score,
-        msg="Unweighted and weighted scores are unexpectedly "
-            "equal (%f) for %s" % (weighted_score, name))
+    if name != "median_absolute_error":
+    # unweighted and weighted give same value for this metric. see #6217
+        assert_not_equal(
+            unweighted_score, weighted_score,
+            msg="Unweighted and weighted scores are unexpectedly "
+                "equal (%f) for %s" % (weighted_score, name))
 
     # check that sample_weight can be a list
     weighted_score_list = metric(y1, y2,
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
@@ -5,6 +5,7 @@
 
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_equal
+from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -142,3 +143,23 @@ def test_regression_custom_weights():
     assert_almost_equal(maew, 0.475, decimal=3)
     assert_almost_equal(rw, 0.94, decimal=2)
     assert_almost_equal(evsw, 0.94, decimal=2)
+    
+def test_median_absolute_error_weights():
+    y_tr = [3, -0.5, 2, 7]
+    y_pr = [2.5, 0.0, 2, 8]
+    sample_weight = [1, 2, 3, 4]
+    # check that unit weights gives the same score as no weight
+    unweighted_score = median_absolute_error(y_tr, y_pr, sample_weight=None)
+    assert_almost_equal(
+        unweighted_score, median_absolute_error(y_tr, y_pr,
+                        sample_weight=np.ones(shape=len(y_tr))),
+        err_msg="For median_absolute_error sample_weight=None is not "
+                "equivalent to sample_weight=ones" )
+
+    # check that the weighted and unweighted scores are unequal
+    weighted_score = median_absolute_error(y_tr, y_pr,
+                            sample_weight=sample_weight)
+    assert_not_equal(
+        unweighted_score, weighted_score,
+        msg="Unweighted and weighted scores are unexpectedly "
+            "equal (%f) for median_absolute_error" % weighted_score)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -828,3 +828,13 @@ def _deterministic_vector_sign_flip(u):
     signs = np.sign(u[range(u.shape[0]), max_abs_rows])
     u *= signs[:, np.newaxis]
     return u
+
+
+def weighted_median(array, sample_weight):
+    sorted_idx = np.argsort(array)
+    sample_weight = np.asarray(sample_weight)
+    weight_cdf = sample_weight[sorted_idx].cumsum()
+    weighted_percentile = (weight_cdf - sample_weight/2.0) / weight_cdf[-1]
+    sorted_array = array[sorted_idx]
+    weighted_median = np.interp(0.5, weighted_percentile, sorted_array)
+    return weighted_median