|
25 | 25 | from sklearn.linear_model.ridge import _solve_dense_cholesky |
26 | 26 | from sklearn.linear_model.ridge import _solve_dense_cholesky_kernel |
27 | 27 |
|
28 | | - |
29 | 28 | from sklearn.cross_validation import KFold |
30 | 29 |
|
31 | 30 | diabetes = datasets.load_diabetes() |
@@ -526,3 +525,65 @@ def test_ridgecv_store_cv_values(): |
526 | 525 | y = rng.randn(n_samples, n_responses) |
527 | 526 | r.fit(x, y) |
528 | 527 | assert_equal(r.cv_values_.shape, (n_samples, n_responses, n_alphas)) |
| 528 | + |
| 529 | + |
| 530 | +def test_ridge_sample_weights_in_feature_space(): |
| 531 | + """ |
| 532 | + Check that Cholesky solver in feature space applies sample_weights |
| 533 | + correctly |
| 534 | + """ |
| 535 | + |
| 536 | + rng = np.random.RandomState(42) |
| 537 | + |
| 538 | + n_sampless = [5, 6, 7] * 2 |
| 539 | + n_featuress = [7, 6, 5] * 2 |
| 540 | + n_targetss = [1, 1, 1, 2, 2, 2] |
| 541 | + noise = 1. |
| 542 | + alpha = 2. |
| 543 | + alpha = np.atleast_1d(alpha) |
| 544 | + |
| 545 | + for n_samples, n_features, n_targets in zip(n_sampless, |
| 546 | + n_featuress, |
| 547 | + n_targetss): |
| 548 | + X = rng.randn(n_samples, n_features) |
| 549 | + beta = rng.randn(n_features, n_targets) |
| 550 | + Y = X.dot(beta) |
| 551 | + Y_noisy = Y + rng.randn(*Y.shape) * np.sqrt((Y ** 2).sum(0)) * noise |
| 552 | + |
| 553 | + K = X.dot(X.T) |
| 554 | + sample_weights = 1. + (rng.randn(n_samples) ** 2) * 10 |
| 555 | + |
| 556 | + coef_sample_space = _solve_dense_cholesky_kernel(K, Y, alpha, |
| 557 | + sample_weight=sample_weights) |
| 558 | + |
| 559 | + coef_feature_space = _solve_dense_cholesky(X, Y, alpha, |
| 560 | + sample_weight=sample_weights) |
| 561 | + |
| 562 | + assert_array_almost_equal(X.T.dot(coef_sample_space), |
| 563 | + coef_feature_space.T) |
| 564 | + |
| 565 | + |
| 566 | +def test_branches_to_sample_weights_in_feature_space(): |
| 567 | + """ |
| 568 | + Check correct branching to feature space treatment if |
| 569 | + n_samples > n_features, even if sample_weights are given. |
| 570 | + This test is constructed to cause a MemoryError if the |
| 571 | + decision branches to a treatment in sample space |
| 572 | + """ |
| 573 | + |
| 574 | + # cause a memory error |
| 575 | + theoretical_memory_usage = 2 # Terabytes |
| 576 | + n_samples = int( |
| 577 | + np.ceil(np.sqrt(theoretical_memory_usage * 8 * 1024 ** 4))) |
| 578 | + |
| 579 | + n_features = 2 |
| 580 | + rng = np.random.RandomState(42) |
| 581 | + X = rng.randn(n_samples, n_features) |
| 582 | + beta = np.array([1., 2.]) |
| 583 | + y = X.dot(beta) |
| 584 | + |
| 585 | + sample_weights = rng.randn(n_samples) ** 2 + 1 |
| 586 | + ridge = Ridge(alpha=1.) |
| 587 | + |
| 588 | + ridge.fit(X, y, sample_weight=sample_weights) |
| 589 | + |
0 commit comments