Skip to content

Commit fd81c72

Browse files
committed
Added test for correctness of ridge regression with sample weights in feature space as well as test for correct branching decision in ridge_regression function
1 parent 3d38c1a commit fd81c72

File tree

1 file changed

+62
-1
lines changed

1 file changed

+62
-1
lines changed

sklearn/linear_model/tests/test_ridge.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from sklearn.linear_model.ridge import _solve_dense_cholesky
2626
from sklearn.linear_model.ridge import _solve_dense_cholesky_kernel
2727

28-
2928
from sklearn.cross_validation import KFold
3029

3130
diabetes = datasets.load_diabetes()
@@ -526,3 +525,65 @@ def test_ridgecv_store_cv_values():
526525
y = rng.randn(n_samples, n_responses)
527526
r.fit(x, y)
528527
assert_equal(r.cv_values_.shape, (n_samples, n_responses, n_alphas))
528+
529+
530+
def test_ridge_sample_weights_in_feature_space():
531+
"""
532+
Check that Cholesky solver in feature space applies sample_weights
533+
correctly
534+
"""
535+
536+
rng = np.random.RandomState(42)
537+
538+
n_sampless = [5, 6, 7] * 2
539+
n_featuress = [7, 6, 5] * 2
540+
n_targetss = [1, 1, 1, 2, 2, 2]
541+
noise = 1.
542+
alpha = 2.
543+
alpha = np.atleast_1d(alpha)
544+
545+
for n_samples, n_features, n_targets in zip(n_sampless,
546+
n_featuress,
547+
n_targetss):
548+
X = rng.randn(n_samples, n_features)
549+
beta = rng.randn(n_features, n_targets)
550+
Y = X.dot(beta)
551+
Y_noisy = Y + rng.randn(*Y.shape) * np.sqrt((Y ** 2).sum(0)) * noise
552+
553+
K = X.dot(X.T)
554+
sample_weights = 1. + (rng.randn(n_samples) ** 2) * 10
555+
556+
coef_sample_space = _solve_dense_cholesky_kernel(K, Y, alpha,
557+
sample_weight=sample_weights)
558+
559+
coef_feature_space = _solve_dense_cholesky(X, Y, alpha,
560+
sample_weight=sample_weights)
561+
562+
assert_array_almost_equal(X.T.dot(coef_sample_space),
563+
coef_feature_space.T)
564+
565+
566+
def test_branches_to_sample_weights_in_feature_space():
567+
"""
568+
Check correct branching to feature space treatment if
569+
n_samples > n_features, even if sample_weights are given.
570+
This test is constructed to cause a MemoryError if the
571+
decision branches to a treatment in sample space
572+
"""
573+
574+
# cause a memory error
575+
theoretical_memory_usage = 2 # Terabytes
576+
n_samples = int(
577+
np.ceil(np.sqrt(theoretical_memory_usage * 8 * 1024 ** 4)))
578+
579+
n_features = 2
580+
rng = np.random.RandomState(42)
581+
X = rng.randn(n_samples, n_features)
582+
beta = np.array([1., 2.])
583+
y = X.dot(beta)
584+
585+
sample_weights = rng.randn(n_samples) ** 2 + 1
586+
ridge = Ridge(alpha=1.)
587+
588+
ridge.fit(X, y, sample_weight=sample_weights)
589+

0 commit comments

Comments
 (0)