Skip to content

Commit 1edf881

Browse files
authored
feat: add model removal and adjust some framework logic (#681)
* prune model task * add component_description * add model removal logic to component, hypo, and task gen * fix ci * adjust coder to meet the requirement of model removal * fix and refine the logic of model removal * add model removal logic in model_eval * fix ci * fix ci * prune some unnecessary codes
1 parent e507fa0 commit 1edf881

File tree

9 files changed

+137
-79
lines changed

9 files changed

+137
-79
lines changed

rdagent/components/coder/data_science/model/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ def implement_one_task(
103103
f"{target_task.name}.py"
104104
] != workspace.file_dict.get(f"{target_task.name}.py"):
105105
break
106+
107+
# If the task involves model removal, assume it can only process one model at a time.
108+
if len(batch_edit) == 1 and batch_edit[f"{target_task.name}.py"] == "__DEL__":
109+
break
106110
else:
107111
raise CoderError("Failed to generate a new model code.")
108112

rdagent/components/coder/data_science/model/eval.py

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -59,37 +59,55 @@ def evaluate(
5959
env = get_ds_env()
6060
env.conf.extra_volumes = {f"{DS_RD_SETTING.local_data_path}/sample/{self.scen.competition}": "/kaggle/input"}
6161

62-
fname = "test/model_test.py"
63-
test_code = (
64-
(DIRNAME / "eval_tests" / "model_test.txt").read_text().replace("model01", target_task.name)
65-
) # only check the model changed this time
66-
implementation.inject_files(**{fname: test_code})
67-
stdout = implementation.execute(env=env, entry=f"python {fname}")
62+
if_model_removed = False
6863

69-
if stdout is None:
70-
raise CoderError(
71-
"The execution output contains too many progress bars and results in the LLM's token size exceeding the limit."
72-
)
64+
if f"{target_task.name}.py" in implementation.file_dict:
65+
fname = "test/model_test.py"
66+
test_code = (
67+
(DIRNAME / "eval_tests" / "model_test.txt").read_text().replace("model01", target_task.name)
68+
) # only check the model changed this time
69+
implementation.inject_files(**{fname: test_code})
70+
stdout = implementation.execute(env=env, entry=f"python {fname}")
71+
72+
if stdout is None:
73+
raise CoderError(
74+
"The execution output contains too many progress bars and results in the LLM's token size exceeding the limit."
75+
)
76+
else:
77+
if_model_removed = True
78+
stdout = f"Model {target_task.name} removal succeeded."
7379

7480
if "main.py" in implementation.file_dict:
7581
workflow_stdout = implementation.execute(env=env, entry="python main.py")
7682
workflow_stdout = re.sub(r"=== Start of EDA part ===(.*)=== End of EDA part ===", "", workflow_stdout)
7783
else:
7884
workflow_stdout = None
7985

80-
system_prompt = T(".prompts:model_eval.system").r(
81-
task_desc=target_task.get_task_information(),
82-
test_code=test_code,
83-
code=implementation.file_dict[f"{target_task.name}.py"],
84-
scenario=self.scen.get_scenario_all_desc(),
85-
spec=implementation.file_dict["spec/model.md"],
86-
workflow_stdout=workflow_stdout,
87-
workflow_code=implementation.all_codes,
88-
)
89-
user_prompt = T(".prompts:model_eval.user").r(
90-
stdout=stdout,
91-
workflow_stdout=workflow_stdout,
92-
)
86+
if if_model_removed:
87+
system_prompt = T(".prompts:model_eval_rm.system").r(
88+
task_desc=target_task.get_task_information(),
89+
workflow_stdout=workflow_stdout,
90+
workflow_code=implementation.all_codes,
91+
)
92+
user_prompt = T(".prompts:model_eval_rm.user").r(
93+
stdout=stdout,
94+
workflow_stdout=workflow_stdout,
95+
)
96+
else:
97+
system_prompt = T(".prompts:model_eval.system").r(
98+
task_desc=target_task.get_task_information(),
99+
test_code=test_code,
100+
code=implementation.file_dict[f"{target_task.name}.py"],
101+
scenario=self.scen.get_scenario_all_desc(),
102+
spec=implementation.file_dict["spec/model.md"],
103+
workflow_stdout=workflow_stdout,
104+
workflow_code=implementation.all_codes,
105+
)
106+
user_prompt = T(".prompts:model_eval.user").r(
107+
stdout=stdout,
108+
workflow_stdout=workflow_stdout,
109+
)
110+
93111
return build_cls_from_json_with_retry(
94112
ModelSingleFeedback,
95113
system_prompt=system_prompt,

rdagent/components/coder/data_science/model/exp.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,13 @@ def __init__(
99
self,
1010
name: str,
1111
description: str,
12-
architecture: str = "",
1312
*args,
14-
hyperparameters: Dict[str, str] = {},
15-
model_type: Optional[str] = None,
1613
**kwargs,
1714
) -> None:
18-
self.architecture: str = architecture
19-
self.hyperparameters: str = hyperparameters
20-
self.model_type: str | None = (
21-
model_type # Tabular for tabular model, TimesSeries for time series model, Graph for graph model, XGBoost for XGBoost model
22-
# TODO: More Models Supported
23-
)
2415
super().__init__(name=name, description=description, *args, **kwargs)
2516

2617
def get_task_information(self):
2718
task_desc = f"""name: {self.name}
2819
description: {self.description}
2920
"""
30-
if self.architecture:
31-
task_desc += f"architecture: {self.architecture}\n"
32-
if self.hyperparameters:
33-
task_desc += f"hyperparameters: {self.hyperparameters}\n"
34-
if self.model_type:
35-
task_desc += f"model_type: {self.model_type}\n"
3621
return task_desc

rdagent/components/coder/data_science/model/prompts.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,43 @@ model_eval:
138138
--------- Whole workflow test stdout ---------
139139
{{ workflow_stdout }}
140140
{% endif %}
141+
142+
model_eval_rm:
143+
system: |-
144+
You are a data scientist responsible for evaluating model removal process.
145+
146+
## Task Description
147+
{{ task_desc }}
148+
149+
{% if workflow_stdout is not none %}
150+
## Whole Workflow Consideration
151+
The model building code is part of the whole workflow. The user has executed the entire pipeline and provided additional stdout.
152+
153+
**Workflow Code:**
154+
```python
155+
{{ workflow_code }}
156+
```
157+
158+
You should evaluate both the model removal test results and the overall workflow results. **Approve the code only if both tests pass.**
159+
{% endif %}
160+
161+
## Evaluation Criteria
162+
You will be given the standard output (`stdout`) from the model removal test and, if applicable, the workflow test.
163+
164+
Please respond with your feedback in the following JSON format and order
165+
```json
166+
{
167+
"execution": "Describe how well the model removal executed, including any errors or issues encountered. Append all error messages and full traceback details without summarizing or omitting any information.",
168+
"return_checking": "Check the generated value, including whether the value is generated and comparing the shape of the model output with the requirement in spec.md.",
169+
"code": "Assess code quality, readability, and adherence to specifications.",
170+
"final_decision": <true/false>
171+
}
172+
```
173+
174+
user: |-
175+
--------- Model removal test stdout ---------
176+
{{ stdout }}
177+
{% if workflow_stdout is not none %}
178+
--------- Whole workflow test stdout ---------
179+
{{ workflow_stdout }}
180+
{% endif %}

rdagent/scenarios/data_science/proposal/exp_gen.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,10 @@ def __str__(self) -> str:
6161
"task_class": FeatureTask,
6262
},
6363
"Model": {
64-
"target_name": "Building model",
64+
"target_name": "Model",
6565
"spec_file": "spec/model.md",
6666
"task_output_format": T(".prompts:output_format.model").r(),
6767
"task_class": ModelTask,
68-
"extra_params": {
69-
"model_type": "Model type not provided",
70-
"architecture": "Model architecture not provided",
71-
"hyperparameters": "Model hyperparameters not provided",
72-
},
73-
"extra_requirement": T(".prompts:extra_requirement.model").r(),
7468
},
7569
"Ensemble": {
7670
"target_name": "Ensemble",
@@ -259,10 +253,6 @@ def _handle_missing_component(
259253
task = task_cls(
260254
name=component if component != "Model" else resp_dict.pop("model_name"),
261255
description=resp_dict.get("description", f"{component} description not provided"),
262-
**{
263-
k: resp_dict.get("extra_params", {}).get(k, v)
264-
for k, v in COMPONENT_TASK_MAPPING[component].get("extra_params", {}).items()
265-
},
266256
)
267257

268258
exp = DSExperiment(pending_tasks_list=[[task]], hypothesis=DSHypothesis(component))
@@ -350,6 +340,12 @@ def gen(self, trace: DSTrace) -> DSExperiment:
350340
scenario=scenario_desc,
351341
sota_exp_desc=sota_exp_desc,
352342
last_exp_diff=last_exp_diff,
343+
component_desc="\n".join(
344+
[
345+
f"[{key}] {value}"
346+
for key, value in T("scenarios.data_science.share:component_description").template.items()
347+
]
348+
),
353349
component_output_format=T(".prompts:output_format.component").r(),
354350
)
355351

@@ -396,12 +392,12 @@ def gen(self, trace: DSTrace) -> DSExperiment:
396392
hypothesis_output_format=T(".prompts:output_format.hypothesis").r(),
397393
task_specification=sota_exp.experiment_workspace.file_dict[component_info["spec_file"]],
398394
task_output_format=component_info["task_output_format"],
399-
extra_requirement=component_info.get("extra_requirement"),
400395
workflow_check=(not component == "Workflow"),
401396
)
402397

403398
user_prompt = T(".prompts:direct_exp_gen.user").r(
404399
targets=component_info["target_name"],
400+
sota_exp_desc=sota_exp_desc,
405401
sota_exp_and_feedback_list_desc=sota_exp_feedback_list_desc,
406402
failed_exp_and_feedback_list_desc=failed_exp_feedback_list_desc,
407403
last_exp_diff=last_exp_diff,

rdagent/scenarios/data_science/proposal/prompts.yaml

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,6 @@ direct_exp_gen:
213213
{% if workflow_check %}"workflow_update": [Partial Response Format 3], {% endif %}
214214
}
215215
216-
{% if extra_requirement %}
217-
{{ extra_requirement }}
218-
{% endif %}
219-
220216
user: |-
221217
# All former successful experiments and their feedbacks, the current SOTA solution is the combination of the best solutions of these trials:
222218
{{ sota_exp_and_feedback_list_desc }}
@@ -226,8 +222,23 @@ direct_exp_gen:
226222
The user has made several hypothesis on this scenario and did several evaluation on them.
227223
{{ failed_exp_and_feedback_list_desc }}
228224
229-
{% if targets == "Building model" %}
225+
{% if targets == "Model" %}
230226
Based on the feedback from previous experiment failures, if the failure was due to exceeding the time limit or memory constraints, start with the smallest model size or choose alternative algorithms or methods with significantly lower time or space complexity instead of using a neural network. You can then iteratively refine and optimize the model in later stages.
227+
228+
Here is the SOTA solution:
229+
{{ sota_exp_desc }}
230+
Pay attention to the **Results** section. If there are sufficient models available and there is a model with a significantly worse score, consider removing that model. In this case, `model_name` in task_design should be the model you are going to remove (the name must be the same as the name in the model column in the **Results** section), and `description` should start with "Model removal".
231+
Otherwise, if the number of available models is insufficient. Your task is to first decide whether to:
232+
- Tune an existing model: Select one of the current models for further tuning and improvement.
233+
- Add a new model: Introduce a new model to expand the hypothesis space.
234+
235+
The information of the model is described by the code of workspace.
236+
237+
Make a decision and proceed accordingly:
238+
- If you decide to tune an existing model, select the existing model file and generate a new hypothesis.
239+
- If you decide to add a new model, specify the type of model you would add and generate a new hypothesis related to the new model.
240+
241+
When building the model, if the runtime permits, consider incorporating hyperparameter search methods to improve performance.
231242
{% endif %}
232243
233244
{% endif %}
@@ -258,13 +269,12 @@ component_gen:
258269
system: |-
259270
You are a Kaggle Grander Master. You are going to provide a solution for a kaggle competition.
260271
261-
Here is the description of the competition scenario:
262-
```
272+
# Here is the description of the competition scenario:
263273
{{ scenario }}
264-
```
265274
266275
# Here is the current best version of implementation:
267276
{{ sota_exp_desc }}
277+
[Notice] Pay attention to the **Results** section. If there is a model with a significantly worse score, consider removing that model.
268278
269279
{% if last_exp_diff %}
270280
# Here are the differences between the latest version of implementation and the current best version of implementation
@@ -274,7 +284,9 @@ component_gen:
274284
275285
You will be provided the feedback for the latest implementation.
276286
277-
Please select the component you are going to improve the latest implementation or sota implementation.
287+
Please select the component you are going to improve the sota implementation.
288+
# Here is the brief description of the components you can select:
289+
{{ component_desc }}
278290
279291
Please generate the output in JSON format following the format below:
280292
{{ component_output_format }}
@@ -346,17 +358,7 @@ output_format:
346358
The output should follow JSON format. The schema is as follows:
347359
{
348360
"model_name": "model name, must start with 'model_' and only contain letters, numbers, and underscores",
349-
"description": "A precise and comprehensive description of the model",
350-
"extra_params":
351-
{
352-
"model_type": "The type of the model, e.g., neural network, tree-based model, etc.",
353-
"architecture": "A detailed description of the model's architecture, e.g., neural network layers or tree structures",
354-
"hyperparameters": {
355-
"hyperparameter_name_1": "value of hyperparameter 1",
356-
"hyperparameter_name_2": "value of hyperparameter 2",
357-
"hyperparameter_name_3": "value of hyperparameter 3"
358-
},
359-
},
361+
"description": "A precise and comprehensive description of the model. Start with [Model building/tuning] or [Model removal].",
360362
}
361363
ensemble: |-
362364
Design a specific and detailed ensemble task based on the given hypothesis. The output should be detailed enough to directly implement the corresponding code.

rdagent/scenarios/data_science/scen/prompts.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
scenario_description: |-
22
------Background of the scenario------
3-
{{background}}
3+
{{ background }}
44
55
------ Guidelines for participating in the competition ----
66
Before submitting your results, we have numerous tests ready to check your code. Please ensure your submission is genuine and do not manipulate data or return values just to pass the tests, as this will not lead to successful final results.
77
88
------The expected output & submission format specifications------
9-
{{submission_specifications}}
9+
{{ submission_specifications }}
1010
1111
{% if evaluation is not none %}
1212
------Evaluation------
13-
{{evaluation}}
13+
{{ evaluation }}
1414
{% endif %}
1515
1616
The evaluation metrics used is directed as:
17-
{% if metric_direction %}The metric is better when it is bigger.
18-
{% else %}The metric is better when it is smaller.
17+
{% if metric_direction %} The metric is better when it is bigger.
18+
{% else %} The metric is better when it is smaller.
1919
{% endif %}
2020
2121
{% if eda_output is not none %}
@@ -57,7 +57,7 @@ competition_background: |-
5757
The data type used in this competition is {{ data_type }}.
5858
Briefly, the competition involves: {{ brief_description }}.
5959
The dataset used in this competition is: {{ dataset_description }}.
60-
Your goal in this competition is to: {{target_description }}.
60+
Your goal in this competition is to: {{ target_description }}.
6161
6262
rich_style_description: |-
6363
### {{ name }} Agent: Automated Feature Engineering & Model Tuning Evolution

rdagent/scenarios/data_science/share.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,16 @@ describe: # some template to describe some object
6060
Reason: {{ exp_and_feedback[1].reason }}
6161
{% endfor %}
6262
{% endif %}
63+
64+
65+
component_description:
66+
data_loader: |-
67+
Loads and preprocesses competition data, ensuring proper data types, handling missing values, and providing an exploratory data analysis summary.
68+
feature: |-
69+
Transforms raw data into meaningful features while maintaining shape consistency, avoiding data leakage, and optimizing for model performance.
70+
model: |-
71+
Perform one of three tasks: model building, which develops a model to address the problem; model tuning, which optimizes an existing model for better performance; or model removal, which discards models that do not contribute effectively.
72+
ensemble: |-
73+
Combines predictions from multiple models using ensemble strategies, evaluates their performance, and generates the final test predictions.
74+
workflow: |-
75+
Integrates all pipeline components, from data loading to ensemble prediction, ensuring efficient execution and correct output formatting.

rdagent/utils/agent/tpl.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ BatchEditOut: |-
1212
For example:
1313
Inject the code into the folder. Your file name should always contain the suffix. Your file name keys should be unique to avoid delete or replace conflicts.
1414
{
15-
<file name1>: "<code>", // indicate writing <code> into <file name1> (create new file or replace existing file)
15+
<file name1>: "<code>", // indicate writing <code> into <file name1> (create a new file or update an existing file)
1616
{% if with_del %}
17-
<file name2>: "__DEL__" // indicate removing file name2. When we want to replace a file to a new one, we usually use this
17+
<file name2>: "__DEL__" // indicate removing file name2. When we want to just remove a file or replace a file to a new one, we usually use this
1818
{% else %}
19-
<file name2>(optional): "<code>" // indicate writing <code> into <file name2> (create new file or replace existing file)
19+
<file name2> (optional): "<code>" // indicate writing <code> into <file name2> (create a new file or update an existing file)
2020
{% endif %}
2121
}

0 commit comments

Comments
 (0)