|
15 | 15 | from rdagent.core.experiment import FBWorkspace, Task |
16 | 16 | from rdagent.oai.llm_utils import APIBackend |
17 | 17 | from rdagent.utils.agent.tpl import T |
18 | | -from rdagent.utils.env import DockerEnv, DSDockerConf |
| 18 | +from rdagent.utils.env import DockerEnv, DSDockerConf, MLEBDockerConf |
19 | 19 |
|
20 | 20 | DIRNAME = Path(__file__).absolute().resolve().parent |
21 | 21 |
|
@@ -53,11 +53,22 @@ def evaluate( |
53 | 53 | code="This task has failed too many times, skip implementation.", |
54 | 54 | final_decision=False, |
55 | 55 | ) |
| 56 | + |
| 57 | + # DockerEnv for Kaggle Competition |
56 | 58 | ds_docker_conf = DSDockerConf() |
57 | 59 | ds_docker_conf.extra_volumes = { |
58 | 60 | f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input" |
59 | 61 | } |
60 | 62 | de = DockerEnv(conf=ds_docker_conf) |
| 63 | + |
| 64 | + # DockerEnv for MLEBench submission validation |
| 65 | + mle_de_conf = MLEBDockerConf() |
| 66 | + mle_de_conf.extra_volumes = { |
| 67 | + f"{DS_RD_SETTING.local_data_path}/zip_files": "/mle/data", |
| 68 | + } |
| 69 | + mde = DockerEnv(conf=mle_de_conf) |
| 70 | + mde.prepare() |
| 71 | + |
61 | 72 | # Clean the scores.csv & submission.csv. |
62 | 73 | stdout = implementation.execute(env=de, entry=f"rm submission.csv scores.csv") |
63 | 74 |
|
@@ -85,10 +96,21 @@ def evaluate( |
85 | 96 | if not submission_fp.exists(): |
86 | 97 | stdout += "\nSubmission file (submission.csv) is not generated." |
87 | 98 | else: |
88 | | - check_code = (DIRNAME / "eval_tests" / "submission_check.txt").read_text() |
89 | | - implementation.inject_files(**{"submission_check.py": check_code}) |
| 99 | + base_check_code = (DIRNAME / "eval_tests" / "submission_check.txt").read_text() |
| 100 | + implementation.inject_files(**{"submission_check.py": base_check_code}) |
| 101 | + stdout += "----Submission Check 1-----\n" |
90 | 102 | stdout += implementation.execute(env=de, entry="python submission_check.py") |
91 | 103 |
|
| 104 | + # MLEBench Check |
| 105 | + mle_check_code = ( |
| 106 | + (DIRNAME / "eval_tests" / "mle_submission_check.txt") |
| 107 | + .read_text() |
| 108 | + .replace("<competition_id>", self.scen.competition) |
| 109 | + ) |
| 110 | + implementation.inject_files(**{"mle_submission_check.py": mle_check_code}) |
| 111 | + stdout += "----Submission Check 2-----\n" |
| 112 | + stdout += implementation.execute(env=mde, entry=f"python mle_submission_check.py") |
| 113 | + |
92 | 114 | system_prompt = T(".prompts:workflow_eval.system").r( |
93 | 115 | scenario=self.scen.get_scenario_all_desc(), |
94 | 116 | task_desc=target_task.get_task_information(), |
|
0 commit comments