Skip to content

Commit 1a5f45a

Browse files
authored
feat: fix some bugs and add original features' description (#259)
* init a scenario for kaggle feature engineering * fix some bugs and add original features' description * refine the process of data downloading * fix a error * revert the code * fix a bug in feedback * fix a ci bug * fix a ci bug
1 parent 4cf22a6 commit 1a5f45a

File tree

17 files changed

+51
-107
lines changed

17 files changed

+51
-107
lines changed

docs/scens/model_agent_med.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,6 @@ You can try our demo by running the following command:
123123
124124
The following environment variables can be set in the `.env` file to customize the application's behavior:
125125
126-
.. autopydantic_settings:: rdagent.app.data_mining.conf.PropSetting
126+
.. autopydantic_settings:: rdagent.app.data_mining.conf.MedBasePropSetting
127127
:settings-show-field-summary: False
128128
:exclude-members: Config

rdagent/app/data_mining/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from rdagent.components.workflow.conf import BasePropSetting
66

77

8-
class PropSetting(BasePropSetting):
8+
class MedBasePropSetting(BasePropSetting):
99
class Config:
1010
env_prefix = "DM_"
1111
"""Use `DM_` as prefix for environment variables"""
@@ -46,4 +46,4 @@ class Config:
4646
"""Physionet account password"""
4747

4848

49-
PROP_SETTING = PropSetting()
49+
MED_PROP_SETTING = MedBasePropSetting()

rdagent/app/data_mining/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import fire
22

3-
from rdagent.app.data_mining.conf import PROP_SETTING
3+
from rdagent.app.data_mining.conf import MED_PROP_SETTING
44
from rdagent.components.workflow.rd_loop import RDLoop
55
from rdagent.core.exception import ModelEmptyError
66

@@ -21,7 +21,7 @@ def main(path=None, step_n=None):
2121
2222
"""
2323
if path is None:
24-
model_loop = ModelRDLoop(PROP_SETTING)
24+
model_loop = ModelRDLoop(MED_PROP_SETTING)
2525
else:
2626
model_loop = ModelRDLoop.load(path)
2727
model_loop.run(step_n=step_n)

rdagent/app/kaggle/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from rdagent.components.workflow.conf import BasePropSetting
66

77

8-
class PropSetting(BasePropSetting):
8+
class KaggleBasePropSetting(BasePropSetting):
99
class Config:
1010
env_prefix = "KG_"
1111
"""Use `KG_` as prefix for environment variables"""
@@ -43,4 +43,4 @@ class Config:
4343
competition: str = ""
4444

4545

46-
PROP_SETTING = PropSetting()
46+
KAGGLE_IMPLEMENT_SETTING = KaggleBasePropSetting()

rdagent/app/kaggle/loop.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import fire
55

6-
from rdagent.app.kaggle.conf import PROP_SETTING
6+
from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING
77
from rdagent.components.workflow.conf import BasePropSetting
88
from rdagent.components.workflow.rd_loop import RDLoop
99
from rdagent.core.developer import Developer
@@ -83,9 +83,9 @@ def main(path=None, step_n=None, competition=None):
8383
8484
"""
8585
if competition:
86-
PROP_SETTING.competition = competition
86+
KAGGLE_IMPLEMENT_SETTING.competition = competition
8787
if path is None:
88-
model_loop = ModelRDLoop(PROP_SETTING)
88+
model_loop = ModelRDLoop(KAGGLE_IMPLEMENT_SETTING)
8989
else:
9090
model_loop = ModelRDLoop.load(path)
9191
model_loop.run(step_n=step_n)

rdagent/app/kaggle_feature/model.py

Lines changed: 0 additions & 65 deletions
This file was deleted.

rdagent/components/coder/factor_coder/factor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas as pd
1010
from filelock import FileLock
1111

12+
from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING
1213
from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS
1314
from rdagent.core.exception import CodeFormatError, CustomRuntimeError, NoOutputError
1415
from rdagent.core.experiment import Experiment, FBWorkspace, Task
@@ -148,9 +149,7 @@ def execute(self, store_result: bool = False, data_type: str = "Debug") -> Tuple
148149
)
149150
elif self.target_task.version == 2:
150151
# TODO you can change the name of the data folder for a better understanding
151-
source_data_path = Path(
152-
FACTOR_IMPLEMENT_SETTINGS.data_folder,
153-
)
152+
source_data_path = Path(FACTOR_IMPLEMENT_SETTINGS.data_folder) / KAGGLE_IMPLEMENT_SETTING.competition
154153

155154
source_data_path.mkdir(exist_ok=True, parents=True)
156155
code_path = self.workspace_path / f"factor.py"
@@ -237,3 +236,4 @@ def from_folder(task: FactorTask, path: Union[str, Path], **kwargs):
237236

238237

239238
FactorExperiment = Experiment
239+
FeatureExperiment = Experiment

rdagent/components/coder/model_coder/model.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,5 +137,4 @@ def execute(
137137
return execution_feedback_str, execution_model_output
138138

139139

140-
FeatureExperiment = Experiment
141140
ModelExperiment = Experiment

rdagent/scenarios/data_mining/experiment/workspace.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import pandas as pd
44

5-
from rdagent.app.data_mining.conf import PROP_SETTING
5+
from rdagent.app.data_mining.conf import MED_PROP_SETTING
66
from rdagent.core.experiment import FBWorkspace
77
from rdagent.log import rdagent_logger as logger
88
from rdagent.utils.env import DMDockerEnv
@@ -15,7 +15,7 @@ def __init__(self, template_folder_path: Path, *args, **kwargs) -> None:
1515

1616
def execute(self, run_env: dict = {}, *args, **kwargs) -> str:
1717
qtde = DMDockerEnv()
18-
qtde.prepare(PROP_SETTING.username, PROP_SETTING.password)
18+
qtde.prepare(MED_PROP_SETTING.username, MED_PROP_SETTING.password)
1919

2020
execute_log = qtde.run(
2121
local_path=str(self.workspace_path),

rdagent/scenarios/kaggle/developer/feedback.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,20 @@ def process_results(current_result, sota_result):
2626
sota_df = pd.DataFrame(sota_result)
2727

2828
# Combine the dataframes on the Metric index
29-
combined_df = pd.DataFrame({"Current Result": current_df, "SOTA Result": sota_df})
29+
combined_df = pd.concat([current_df, sota_df], axis=1)
30+
combined_df.columns = ["current_df", "sota_df"]
3031

31-
# Add a new column to show which result is bigger
32-
combined_df["Bigger Result"] = combined_df.apply(
33-
lambda row: "Equal"
34-
if row["Current Result"] == row["SOTA Result"]
35-
else ("Current Result" if row["Current Result"] > row["SOTA Result"] else "SOTA Result"),
32+
combined_df["the largest"] = combined_df.apply(
33+
lambda row: "sota_df"
34+
if row["sota_df"] > row["current_df"]
35+
else ("Equal" if row["sota_df"] == row["current_df"] else "current_df"),
3636
axis=1,
3737
)
3838

3939
# Add a note about metric direction
4040
combined_df["Note"] = "Direction of improvement (higher/lower is better) should be judged per metric"
4141

42-
return combined_df.to_string()
42+
return combined_df
4343

4444

4545
class KGHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):

0 commit comments

Comments
 (0)