microsoft
diff --git a/‎rdagent/app/data_science/conf.py‎
Lines changed: 10 additions & 0 deletions b/‎rdagent/app/data_science/conf.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎rdagent/components/coder/data_science/ensemble/__init__.py‎
Lines changed: 23 additions & 1 deletion b/‎rdagent/components/coder/data_science/ensemble/__init__.py‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt‎
Lines changed: 4 additions & 2 deletions b/‎rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎rdagent/components/coder/data_science/ensemble/prompts.yaml‎
Lines changed: 2 additions & 2 deletions b/‎rdagent/components/coder/data_science/ensemble/prompts.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rdagent/components/coder/data_science/feature/__init__.py‎
Lines changed: 13 additions & 1 deletion b/‎rdagent/components/coder/data_science/feature/__init__.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎rdagent/components/coder/data_science/feature/eval_tests/feature_test.txt‎
Lines changed: 1 addition & 6 deletions b/‎rdagent/components/coder/data_science/feature/eval_tests/feature_test.txt‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎rdagent/components/coder/data_science/feature/prompts.yaml‎
Lines changed: 2 additions & 2 deletions b/‎rdagent/components/coder/data_science/feature/prompts.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rdagent/components/coder/data_science/model/__init__.py‎
Lines changed: 13 additions & 1 deletion b/‎rdagent/components/coder/data_science/model/__init__.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎rdagent/components/coder/data_science/model/eval.py‎
Lines changed: 0 additions & 2 deletions b/‎rdagent/components/coder/data_science/model/eval.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎rdagent/components/coder/data_science/model/eval_tests/model_test.txt‎
Lines changed: 3 additions & 0 deletions b/‎rdagent/components/coder/data_science/model/eval_tests/model_test.txt‎
Lines changed: 3 additions & 0 deletions
@@ -19,5 +19,15 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
     full_timeout: int = 3600
     """The timeout limit for running on full data"""
 
+    ### specific feature
+
+    #### enable specification
+    spec_enabled: bool = True
+    # - [ ] rdagent/components/coder/data_science/raw_data_loader/__init__.py: make spec implementation optional
+    # - [ ] move spec responsibility into  rdagent/scenarios/data_science/share.yaml
+    # - [ ] make all spec.md optional;  but replace it with the test & responsibility.   "spec/.*\.md".
+    # - [ ] replace yaml render with target test.  "spec > .yaml data_science !out_spec !task_spec model_spec"
+    # - [ ] At the head of all tests, emphasis the function to be tested.
+
 
 DS_RD_SETTING = DataScienceBasePropSetting()
@@ -12,8 +12,12 @@
 """
 
 import json
+from pathlib import Path
 from typing import Dict
 
+from jinja2 import Environment, StrictUndefined
+
+from rdagent.app.data_science.conf import DS_RD_SETTING
 from rdagent.components.coder.CoSTEER import CoSTEER
 from rdagent.components.coder.CoSTEER.evaluators import (
     CoSTEERMultiEvaluator,
@@ -35,6 +39,8 @@
 from rdagent.utils.agent.ret import PythonAgentOut
 from rdagent.utils.agent.tpl import T
 
+DIRNAME = Path(__file__).absolute().resolve().parent
+
 
 class EnsembleMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
     def implement_one_task(
@@ -79,8 +85,24 @@ def implement_one_task(
             all_code=workspace.all_codes,
             out_spec=PythonAgentOut.get_spec(),
         )
+
+        if DS_RD_SETTING.spec_enabled:
+            code_spec = workspace.file_dict["spec/ensemble.md"]
+        else:
+            test_code = (
+                Environment(undefined=StrictUndefined)
+                .from_string((DIRNAME / "eval_tests" / "ensemble_test.txt").read_text())
+                .render(
+                    model_names=[
+                        fn[:-3] for fn in workspace.file_dict.keys() if fn.startswith("model_") and "test" not in fn
+                    ]
+                )
+            )
+            code_spec = T("scenarios.data_science.share:component_spec.general").r(
+                spec=T("scenarios.data_science.share:component_spec.Ensemble").r(), test_code=test_code
+            )
         user_prompt = T(".prompts:ensemble_coder.user").r(
-            ensemble_spec=workspace.file_dict["spec/ensemble.md"],
+            code_spec=code_spec,
             latest_code=workspace.file_dict.get("ensemble.py"),
             latest_code_feedback=prev_task_feedback,
         )
 
@@ -1,9 +1,11 @@
 """
-A qualified ensemble implementation should:
-- Successfully run
+Tests for `ensemble_workflow` in ensemble.py
+
+A qualified ensemble_workflow implementation should:
 - Return predictions
 - Have correct shapes for inputs and outputs
 - Use validation data appropriately
+- Generate a scores.csv file
 """
 
 import numpy as np
 
@@ -51,8 +51,8 @@ ensemble_coder:
     {% endif %}
 
   user: |-
-    --------- Ensemble Specification ---------
-    {{ ensemble_spec }}
+    --------- Code Specification ---------
+    {{ code_spec }}
 
     {% if latest_code %}
     --------- Former code ---------
 
@@ -1,6 +1,8 @@
 import json
+from pathlib import Path
 from typing import Dict
 
+from rdagent.app.data_science.conf import DS_RD_SETTING
 from rdagent.components.coder.CoSTEER import CoSTEER
 from rdagent.components.coder.CoSTEER.evaluators import (
     CoSTEERMultiEvaluator,
@@ -22,6 +24,8 @@
 from rdagent.utils.agent.ret import PythonAgentOut
 from rdagent.utils.agent.tpl import T
 
+DIRNAME = Path(__file__).absolute().resolve().parent
+
 
 class FeatureMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
     def implement_one_task(
@@ -64,8 +68,16 @@ def implement_one_task(
             queried_former_failed_knowledge=queried_former_failed_knowledge[0],
             out_spec=PythonAgentOut.get_spec(),
         )
+        code_spec = (
+            workspace.file_dict["spec/feature.md"]
+            if DS_RD_SETTING.spec_enabled
+            else T("scenarios.data_science.share:component_spec.general").r(
+                spec=T("scenarios.data_science.share:component_spec.FeatureEng").r(),
+                test_code=(DIRNAME / "eval_tests" / "feature_test.txt").read_text(),
+            )
+        )
         user_prompt = T(".prompts:feature_coder.user").r(
-            feature_spec=workspace.file_dict["spec/feature.md"],
+            code_spec=code_spec,
             latest_code=workspace.file_dict.get("feature.py"),
             latest_code_feedback=prev_task_feedback,
         )
 
@@ -1,10 +1,5 @@
 """
-A qualified data loader should support following features
-- successfully run
-- len(test) == len(test_ids) == submission length
-- len(train) == len(y)
-
-Please make sure the stdout is rich enough to support informative feedback
+Tests for `feat_eng` in feature.py
 """
 
 import pickle
 
@@ -55,8 +55,8 @@ feature_coder:
     {% endif %}
   
   user: |-
-    --------- Feature Processing Specification ---------
-    {{ feature_spec }}
+    --------- Code Specification ---------
+    {{ code_spec }}
 
     {% if latest_code %}
     --------- Former code ---------
 
@@ -1,5 +1,7 @@
+from pathlib import Path
 from typing import Dict
 
+from rdagent.app.data_science.conf import DS_RD_SETTING
 from rdagent.components.coder.CoSTEER import CoSTEER
 from rdagent.components.coder.CoSTEER.evaluators import (
     CoSTEERMultiEvaluator,
@@ -23,6 +25,8 @@
 from rdagent.utils.agent.ret import PythonBatchEditOut
 from rdagent.utils.agent.tpl import T
 
+DIRNAME = Path(__file__).absolute().resolve().parent
+
 
 class ModelMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
     def implement_one_task(
@@ -71,8 +75,16 @@ def implement_one_task(
         #     latest_code=workspace.file_dict.get(f"{target_task.name}.py", None),
         # )
         # We want to use a simpler way to
+        code_spec = (
+            workspace.file_dict["spec/model.md"]
+            if DS_RD_SETTING.spec_enabled
+            else T("scenarios.data_science.share:component_spec.general").r(
+                spec=T("scenarios.data_science.share:component_spec.Model").r(),
+                test_code=(DIRNAME / "eval_tests" / "model_test.txt").read_text().replace("model01", target_task.name),
+            )
+        )
         user_prompt = T(".prompts:model_coder.user_general").r(
-            model_spec=workspace.file_dict["spec/model.md"],
+            code_spec=code_spec,
             latest_model_code=workspace.get_codes(
                 r"^model_(?!test)\w+\.py$"
             ),  # TODO: If we have high failure rate here, we should clean this step with less information.
 
@@ -98,8 +98,6 @@ def evaluate(
                 task_desc=target_task.get_task_information(),
                 test_code=test_code,
                 code=implementation.file_dict[f"{target_task.name}.py"],
-                scenario=self.scen.get_scenario_all_desc(),
-                spec=implementation.file_dict["spec/model.md"],
                 workflow_stdout=workflow_stdout,
                 workflow_code=implementation.all_codes,
             )
 
@@ -1,3 +1,6 @@
+"""
+Tests for `model_workflow` in model01.py
+"""
 import time
 
 from feature import feat_eng
Original file line number	Diff line number	Diff line change
`@@ -98,8 +98,6 @@ def evaluate(`
`98`	`98`	`task_desc=target_task.get_task_information(),`
`99`	`99`	`test_code=test_code,`
`100`	`100`	`code=implementation.file_dict[f"{target_task.name}.py"],`
`101`		`- scenario=self.scen.get_scenario_all_desc(),`
`102`		`- spec=implementation.file_dict["spec/model.md"],`
`103`	`101`	`workflow_stdout=workflow_stdout,`
`104`	`102`	`workflow_code=implementation.all_codes,`
`105`	`103`	`)`