@@ -221,11 +221,15 @@ def test_gamma_y_positive(y):
221221
222222def test_gamma ():
223223 # For Gamma distributed target, an HGBT with Gamma loss should give better results
224- # than an HGBT with Poisson deviance, measured in Gamma deviance as metric.
225- # Note that we do not use squared error because it can potentially predict negaitve
226- # values.
224+ # than an HGBT with squared error, measured in Gamma deviance as metric/score.
225+ # Note that squared error could potentially predict negative values which is
226+ # invalid (np.inf) for the Gamma deviance. A Poisson HGBT (having a log link)
227+ # would not have that defect.
228+ # Important note: It seems that a Poisson HGBT almost always has better
229+ # out-of-sample performance than the Gamma HGBT, measured in Gamma deviance.
230+ # LightGBM shows the same behaviour. The exact origin is unclear.
227231 rng = np .random .RandomState (42 )
228- n_train , n_test , n_features = 500 , 500 , 20
232+ n_train , n_test , n_features = 500 , 100 , 20
229233 X = make_low_rank_matrix (
230234 n_samples = n_train + n_test ,
231235 n_features = n_features ,
@@ -243,21 +247,21 @@ def test_gamma():
243247 X , y , test_size = n_test , random_state = rng
244248 )
245249 gbdt_gamma = HistGradientBoostingRegressor (loss = "gamma" , random_state = 123 )
246- gbdt_pois = HistGradientBoostingRegressor (loss = "poisson " , random_state = 123 )
250+ gbdt_mse = HistGradientBoostingRegressor (loss = "squared_error " , random_state = 123 )
247251 dummy = DummyRegressor (strategy = "mean" )
248- for model in (gbdt_gamma , gbdt_pois , dummy ):
252+ for model in (gbdt_gamma , gbdt_mse , dummy ):
249253 model .fit (X_train , y_train )
250254
251255 for sample , X , y in [("train" , X_train , y_train ), ("test" , X_test , y_test )]:
252- mgd_gbdt_gamma = mean_gamma_deviance (y , gbdt_gamma .predict (X ))
253- mgd_gbdt_pois = mean_gamma_deviance ( y , gbdt_pois . predict ( X ))
254- mgd_dummy = mean_gamma_deviance ( y , dummy . predict ( X ))
255- assert mgd_gbdt_gamma < mgd_dummy
256- if sample == "train" :
257- # Important note: It seems that the Poisson HGBT almost always has better
258- # out-of-sample performance than the Gamma HGBT, measured in Gamma
259- # deviance. LightGBM shows the same behaviour. The exact origin is unclear.
260- assert mgd_gbdt_gamma < mgd_gbdt_pois
256+ score_gbdt_gamma = mean_gamma_deviance (y , gbdt_gamma .predict (X ))
257+ # We restrict the squared error HGBT to predict at least the minimum seen y at
258+ # train time to make it strict positive.
259+ score_gbdt_mse = mean_gamma_deviance (
260+ y , np . maximum ( np . min ( y_train ), gbdt_mse . predict ( X ))
261+ )
262+ score_dummy = mean_gamma_deviance ( y , dummy . predict ( X ))
263+ assert score_gbdt_gamma < score_dummy
264+ assert score_gbdt_gamma < score_gbdt_mse
261265
262266
263267@pytest .mark .parametrize ("quantile" , [0.2 , 0.5 , 0.8 ])
0 commit comments