Skip to content

Commit 5fb0608

Browse files
authored
fix: bug of saving preprocess cache files (#310)
* save independent returns of preprocess_script() to 'others.pkl' * fix CI
1 parent dab2cff commit 5fb0608

File tree

9 files changed

+22
-19
lines changed

9 files changed

+22
-19
lines changed

rdagent/scenarios/kaggle/experiment/forest-cover-type-prediction_template/fea_share_preprocess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ def preprocess_script():
3333
y_train = pd.read_pickle("y_train.pkl")
3434
y_valid = pd.read_pickle("y_valid.pkl")
3535
X_test = pd.read_pickle("X_test.pkl")
36-
ids = pd.read_pickle("ids.pkl")
36+
others = pd.read_pickle("others.pkl")
3737

38-
return X_train, X_valid, y_train, y_valid, X_test, ids
38+
return X_train, X_valid, y_train, y_valid, X_test, *others
3939

4040
X_train, X_valid, y_train, y_valid = prepreprocess()
4141

rdagent/scenarios/kaggle/experiment/meta_tpl/fea_share_preprocess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ def preprocess_script():
9090
y_train = pd.read_pickle("/kaggle/preprocessed_data/y_train.pkl")
9191
y_valid = pd.read_pickle("/kaggle/preprocessed_data/y_valid.pkl")
9292
X_test = pd.read_pickle("/kaggle/preprocessed_data/X_test.pkl")
93-
passenger_ids = pd.read_pickle("/kaggle/preprocessed_data/passenger_ids.pkl")
93+
others = pd.read_pickle("/kaggle/preprocessed_data/others.pkl")
9494

95-
return X_train, X_valid, y_train, y_valid, X_test, passenger_ids
95+
return X_train, X_valid, y_train, y_valid, X_test, *others
9696
X_train, X_valid, y_train, y_valid = prepreprocess()
9797

9898
# Fit the preprocessor on the training data

rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/fea_share_preprocess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ def preprocess_script():
9090
y_train = pd.read_pickle("y_train.pkl")
9191
y_valid = pd.read_pickle("y_valid.pkl")
9292
X_test = pd.read_pickle("X_test.pkl")
93-
passenger_ids = pd.read_pickle("passenger_ids.pkl")
93+
others = pd.read_pickle("others.pkl")
9494

95-
return X_train, X_valid, y_train, y_valid, X_test, passenger_ids
95+
return X_train, X_valid, y_train, y_valid, X_test, *others
9696
X_train, X_valid, y_train, y_valid = prepreprocess()
9797

9898
# Fit the preprocessor on the training data

rdagent/scenarios/kaggle/experiment/playground-series-s4e9_template/fea_share_preprocess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ def preprocess_script():
8787
y_train = pd.read_pickle("y_train.pkl")
8888
y_valid = pd.read_pickle("y_valid.pkl")
8989
X_test = pd.read_pickle("X_test.pkl")
90-
passenger_ids = pd.read_pickle("passenger_ids.pkl")
90+
others = pd.read_pickle("others.pkl")
9191

92-
return X_train, X_valid, y_train, y_valid, X_test, passenger_ids
92+
return X_train, X_valid, y_train, y_valid, X_test, *others
9393
X_train, X_valid, y_train, y_valid = prepreprocess()
9494

9595
# Fit the preprocessor on the training data

rdagent/scenarios/kaggle/experiment/scenario.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def source_data(self) -> str:
113113
y_train,
114114
y_valid,
115115
X_test,
116-
passenger_ids,
116+
*others,
117117
) = preprocess_experiment.experiment_workspace.generate_preprocess_data()
118118

119119
data_folder.mkdir(exist_ok=True, parents=True)
@@ -122,7 +122,7 @@ def source_data(self) -> str:
122122
pickle.dump(y_train, open(data_folder / "y_train.pkl", "wb"))
123123
pickle.dump(y_valid, open(data_folder / "y_valid.pkl", "wb"))
124124
pickle.dump(X_test, open(data_folder / "X_test.pkl", "wb"))
125-
pickle.dump(passenger_ids, open(data_folder / "passenger_ids.pkl", "wb"))
125+
pickle.dump(others, open(data_folder / "others.pkl", "wb"))
126126

127127
buffer = io.StringIO()
128128
X_valid.info(verbose=True, buf=buffer, show_counts=True)

rdagent/scenarios/kaggle/experiment/sf-crime_template/fea_share_preprocess.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ def preprocess_script():
108108
y_train = pd.read_pickle("y_train.pkl")
109109
y_valid = pd.read_pickle("y_valid.pkl")
110110
X_test = pd.read_pickle("X_test.pkl")
111-
return X_train, X_valid, y_train, y_valid, X_test
111+
others = pd.read_pickle("others.pkl")
112+
return X_train, X_valid, y_train, y_valid, X_test, *others
112113

113114
X_train, X_valid, y_train, y_valid, test, category_encoder, test_ids = prepreprocess()
114115

rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/fea_share_preprocess.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ def preprocess_script():
8484
y_train = pd.read_pickle("y_train.pkl")
8585
y_valid = pd.read_pickle("y_valid.pkl")
8686
X_test = pd.read_pickle("X_test.pkl")
87-
passenger_ids = pd.read_pickle("passenger_ids.pkl")
87+
others = pd.read_pickle("others.pkl")
8888

89-
return X_train, X_valid, y_train, y_valid, X_test, passenger_ids
89+
return X_train, X_valid, y_train, y_valid, X_test, *others
9090
X_train, X_valid, y_train, y_valid = prepreprocess()
9191

9292
# Fit the preprocessor on the training data

rdagent/scenarios/kaggle/experiment/workspace.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import subprocess
22
import zipfile
33
from pathlib import Path
4+
from typing import Any
45

56
import pandas as pd
67

@@ -14,14 +15,14 @@
1415
1516
from fea_share_preprocess import preprocess_script
1617
17-
X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script()
18+
X_train, X_valid, y_train, y_valid, X_test, *others = preprocess_script()
1819
1920
pickle.dump(X_train, open("X_train.pkl", "wb"))
2021
pickle.dump(X_valid, open("X_valid.pkl", "wb"))
2122
pickle.dump(y_train, open("y_train.pkl", "wb"))
2223
pickle.dump(y_valid, open("y_valid.pkl", "wb"))
2324
pickle.dump(X_test, open("X_test.pkl", "wb"))
24-
pickle.dump(passenger_ids, open("passenger_ids.pkl", "wb"))
25+
pickle.dump(others, open("others.pkl", "wb"))
2526
"""
2627

2728

@@ -34,7 +35,7 @@ def __init__(self, template_folder_path: Path, *args, **kwargs) -> None:
3435

3536
def generate_preprocess_data(
3637
self,
37-
) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.DataFrame, pd.Series]:
38+
) -> tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series, pd.DataFrame, Any]:
3839
kgde = KGDockerEnv(KAGGLE_IMPLEMENT_SETTING.competition)
3940
kgde.prepare()
4041

@@ -47,7 +48,7 @@ def generate_preprocess_data(
4748
"y_train.pkl",
4849
"y_valid.pkl",
4950
"X_test.pkl",
50-
"passenger_ids.pkl",
51+
"others.pkl",
5152
],
5253
running_extra_volume=(
5354
{KAGGLE_IMPLEMENT_SETTING.local_data_path + "/" + KAGGLE_IMPLEMENT_SETTING.competition: "/kaggle/input"}
@@ -59,8 +60,8 @@ def generate_preprocess_data(
5960
logger.error("Feature preprocess failed.")
6061
raise Exception("Feature preprocess failed.")
6162
else:
62-
X_train, X_valid, y_train, y_valid, X_test, passenger_ids = results
63-
return X_train, X_valid, y_train, y_valid, X_test, passenger_ids
63+
X_train, X_valid, y_train, y_valid, X_test, others = results
64+
return X_train, X_valid, y_train, y_valid, X_test, *others
6465

6566
def execute(self, run_env: dict = {}, *args, **kwargs) -> str:
6667
logger.info(f"Running the experiment in {self.workspace_path}")

rdagent/scenarios/kaggle/kaggle_crawler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def download_data(competition: str, local_path: str = "/data/userdata/share/kagg
127127
"covid19-global-forecasting-week-1",
128128
"birdsong-recognition",
129129
"optiver-trading-at-the-close",
130+
"facebook-v-predicting-check-ins",
130131
]
131132

132133
for i in dsagent_cs + other_cs:

0 commit comments

Comments
 (0)