feat: async mechanism for multi-trace (#981)

xuangu-fang · web-flow · commit 9e60c32cf348 · 2025-06-26T15:49:47.000+08:00
* start to work on multi-trace + async

* init ver of async-multi-tarce, to test

* add eng-ver log

* complete version of async+ mul-trace

* debug

* fix bug on         DS_RD_SETTING.get()

* update

* fix bug + simplif the usage of async in multi-trace

* fix mini bug of arg_name

* Move local_selection into class Experiment &amp; clean the code
diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py
@@ -324,6 +324,9 @@ def __init__(
             {}
         )  # TODO: in Kaggle, now sub results are all saved in self.result, remove this in the future.
 
+        # For parallel multi-trace support
+        self.local_selection: tuple[int, ...] | None = None
+
 
 ASpecificExp = TypeVar("ASpecificExp", bound=Experiment)
 
diff --git a/rdagent/scenarios/data_science/dev/feedback.py b/rdagent/scenarios/data_science/dev/feedback.py
@@ -26,6 +26,7 @@ def generate_feedback(self, exp: DSExperiment, trace: DSTrace) -> ExperimentFeed
         # 3. 相对sota_exp的改动
         # 4. result 任务的结果
         # 5. sota_exp.result 之前最好的结果
+
         sota_exp = trace.sota_experiment()
         sota_desc = T("scenarios.data_science.share:describe.exp").r(
             exp=sota_exp, heading="SOTA of previous exploration of the scenario"
diff --git a/rdagent/scenarios/data_science/experiment/experiment.py b/rdagent/scenarios/data_science/experiment/experiment.py
@@ -29,3 +29,6 @@ def is_ready_to_run(self) -> bool:
         (so it is different from `trace.next_incomplete_component`.)
         """
         return self.experiment_workspace is not None and "main.py" in self.experiment_workspace.file_dict
+
+    def set_local_selection(self, local_selection: tuple[int, ...]) -> None:
+        self.local_selection = local_selection
diff --git a/rdagent/scenarios/data_science/loop.py b/rdagent/scenarios/data_science/loop.py
@@ -142,7 +142,6 @@ def __init__(self, PROP_SETTING: BasePropSetting):
         super(RDLoop, self).__init__()
 
     async def direct_exp_gen(self, prev_out: dict[str, Any]):
-
         # set the SOTA experiment to submit
         sota_exp_to_submit = self.sota_exp_selector.get_sota_exp_to_submit(self.trace)
         self.trace.set_sota_exp_to_submit(sota_exp_to_submit)
@@ -151,7 +150,11 @@ async def direct_exp_gen(self, prev_out: dict[str, Any]):
         selection = self.ckp_selector.get_selection(self.trace)
         # set the current selection for the trace
         self.trace.set_current_selection(selection)
+
+        # in parallel + multi-trace mode, the above global "trace.current_selection" will not be used
+        # instead, we will use the "local_selection" attached to each exp to in async_gen().
         exp = await self.exp_gen.async_gen(self.trace, self)
+
         logger.log_object(exp)
 
         # FIXME: this is for LLM debug webapp, remove this when the debugging is done.
@@ -197,6 +200,11 @@ def feedback(self, prev_out: dict[str, Any]) -> ExperimentFeedback:
         - If we come to feedback phase, the previous development steps are successful.
         """
         exp: DSExperiment = prev_out["running"]
+
+        # set the local selection to the trace after feedback
+        if exp.local_selection is not None:
+            self.trace.set_current_selection(exp.local_selection)
+
         if self.trace.next_incomplete_component() is None or DS_RD_SETTING.coder_on_whole_pipeline:
             # we have alreadly completed components in previous trace. So current loop is focusing on a new proposed idea.
             # So we need feedback for the proposal.
@@ -211,19 +219,36 @@ def feedback(self, prev_out: dict[str, Any]) -> ExperimentFeedback:
         return feedback
 
     def record(self, prev_out: dict[str, Any]):
-        # set the DAG parent for the trace
-        self.trace.sync_dag_parent_and_hist()
+
+        exp: DSExperiment = None
 
         e = prev_out.get(self.EXCEPTION_KEY, None)
         if e is None:
-            self.trace.hist.append((prev_out["running"], prev_out["feedback"]))
+            exp = prev_out["running"]
+            self.trace.hist.append((exp, prev_out["feedback"]))
+
+            # NOTE: we put below  operations on selections here, instead of out of the if-else block,
+            # to fit the corner case that the trace will be reset
+
+            # set the local selection to the trace as global selection, then set the DAG parent for the trace
+            if exp.local_selection is not None:
+                self.trace.set_current_selection(exp.local_selection)
+            self.trace.sync_dag_parent_and_hist()
+
         else:
+            exp: DSExperiment = prev_out["direct_exp_gen"] if isinstance(e, CoderError) else prev_out["coding"]
             self.trace.hist.append(
                 (
-                    prev_out["direct_exp_gen"] if isinstance(e, CoderError) else prev_out["coding"],
+                    exp,
                     ExperimentFeedback.from_exception(e),
                 )
             )
+
+            # set the local selection to the trace as global selection, then set the DAG parent for the trace
+            if exp.local_selection is not None:
+                self.trace.set_current_selection(exp.local_selection)
+            self.trace.sync_dag_parent_and_hist()
+
             if self.trace.sota_experiment() is None:
                 if DS_RD_SETTING.coder_on_whole_pipeline:
                     #  check if feedback is not generated
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/base.py b/rdagent/scenarios/data_science/proposal/exp_gen/base.py
@@ -266,11 +266,12 @@ def last_exp(
     def last_exp_fb(
         self,
         search_type: Literal["all", "ancestors"] = "ancestors",
+        selection: tuple[int, ...] | None = None,
     ) -> tuple[DSExperiment, ExperimentFeedback] | None:
         """
         Access the last experiment and feedback
         """
-        search_list = self.retrieve_search_list(search_type)
+        search_list = self.retrieve_search_list(search_type, selection=selection)
         for exp, ef in search_list[::-1]:
             return exp, ef
         return None
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/merge.py b/rdagent/scenarios/data_science/proposal/exp_gen/merge.py
@@ -218,7 +218,7 @@ def gen(self, trace: DSTrace) -> DSExperiment:
             scenario_desc=scenario_desc,
             sota_exp_desc=sota_exp_desc,
             sota_exp=sota_exp_fb[0] if sota_exp_fb else None,
-            hypothesis=new_hypothesis,
+            hypotheses=[new_hypothesis],
             pipeline=DS_RD_SETTING.coder_on_whole_pipeline,
             failed_exp_feedback_list_desc="",
         )
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/parallel.py b/rdagent/scenarios/data_science/proposal/exp_gen/parallel.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+import asyncio
+from datetime import timedelta
+from typing import TYPE_CHECKING
+
+from rdagent.app.data_science.conf import DS_RD_SETTING
+from rdagent.core.conf import RD_AGENT_SETTINGS
+from rdagent.core.proposal import ExpGen
+from rdagent.log import rdagent_logger as logger
+from rdagent.log.timer import RD_Agent_TIMER_wrapper, RDAgentTimer
+from rdagent.scenarios.data_science.loop import DataScienceRDLoop
+from rdagent.scenarios.data_science.proposal.exp_gen.merge import ExpGen2Hypothesis
+from rdagent.scenarios.data_science.proposal.exp_gen.trace_scheduler import (
+    RoundRobinScheduler,
+    TraceScheduler,
+)
+
+if TYPE_CHECKING:
+    from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
+    from rdagent.scenarios.data_science.proposal.exp_gen.base import DSTrace, Experiment
+    from rdagent.utils.workflow.loop import LoopBase
+
+
+class ParallelMultiTraceExpGen(ExpGen):
+    """
+    An experiment generation strategy that enables parallel multi-trace exploration.
+
+    This generator is designed to work with the "Attribute Injection" model.
+    It uses a TraceScheduler to determine which parent node to expand, and
+    injects this parent context into the experiment object itself.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # The underlying generator for creating a single experiment
+        self.exp_gen = DataScienceRDLoop._get_exp_gen(
+            "rdagent.scenarios.data_science.proposal.exp_gen.DSExpGen", self.scen
+        )
+        self.merge_exp_gen = ExpGen2Hypothesis(self.scen)
+        self.trace_scheduler: TraceScheduler = RoundRobinScheduler()
+        self.max_trace_num = DS_RD_SETTING.max_trace_num
+
+    def gen(self, trace: "DSTrace") -> "Experiment":
+        raise NotImplementedError(
+            "ParallelMultiTraceExpGen is designed for async usage, please call async_gen instead."
+        )
+
+    async def async_gen(self, trace: DSTrace, loop: LoopBase) -> DSExperiment:
+        """
+        Waits for a free execution slot, selects a parent trace using the
+        scheduler, generates a new experiment, and injects the parent context
+        into it before returning.
+        """
+        timer: RDAgentTimer = RD_Agent_TIMER_wrapper.timer
+        logger.info(f"Remain time: {timer.remain_time_duration}")
+        local_selection: tuple[int, ...] = None
+
+        while True:
+
+            if timer.remain_time_duration >= timedelta(hours=DS_RD_SETTING.merge_hours):
+
+                if DS_RD_SETTING.enable_inject_knowledge_at_root:
+
+                    if len(trace.hist) == 0:
+                        # set the knowledge base option to True for the first trace
+                        DS_RD_SETTING.enable_knowledge_base = True
+
+                    else:
+                        # set the knowledge base option back to False for the other traces
+                        DS_RD_SETTING.enable_knowledge_base = False
+                # step 1: select the parant trace to expand
+                # Policy: if we have fewer traces than our target, start a new one.
+                if trace.sub_trace_count < self.max_trace_num:
+                    local_selection = trace.NEW_ROOT
+                else:
+                    # Otherwise, use the scheduler to pick an existing trace to expand.
+                    local_selection = await self.trace_scheduler.select_trace(trace)
+
+                if loop.get_unfinished_loop_cnt(loop.loop_idx) < RD_AGENT_SETTINGS.get_max_parallel():
+
+                    # set the local selection as the global current selection for the trace
+                    trace.set_current_selection(local_selection)
+                    # step 2: generate the experiment with the local selection
+                    exp = self.exp_gen.gen(trace)
+
+                    # Inject the local selection to the experiment object
+                    exp.set_local_selection(local_selection)
+
+                    return exp
+
+            else:
+                # enter the merging stage
+                # make sure the all loops are finished
+                if loop.get_unfinished_loop_cnt(loop.loop_idx) < 1:
+                    # disable reset in merging stage
+                    DS_RD_SETTING.coding_fail_reanalyze_threshold = 100000
+                    DS_RD_SETTING.consecutive_errors = 100000
+
+                    leaves: list[int] = trace.get_leaves()
+                    if len(leaves) < 2:
+                        trace.set_current_selection(selection=(-1,))
+                        return self.exp_gen.gen(trace)
+                    else:
+                        selection = (leaves[0],)
+                        if trace.sota_exp_to_submit is not None:
+                            if trace.is_parent(trace.exp2idx(trace.sota_exp_to_submit), leaves[1]):
+                                selection = (leaves[1],)
+                        trace.set_current_selection(selection)
+                        return self.merge_exp_gen.gen(trace)
+
+            await asyncio.sleep(1)
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py b/rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
@@ -802,7 +802,11 @@ def get_all_hypotheses(self, problem_dict: dict, hypothesis_dict: dict) -> list[
             )
         return result
 
-    def gen(self, trace: DSTrace) -> DSExperiment:
+    def gen(
+        self,
+        trace: DSTrace,
+    ) -> DSExperiment:
+
         pipeline = DS_RD_SETTING.coder_on_whole_pipeline
         if not pipeline and (draft_exp := draft_exp_in_decomposition(self.scen, trace)):
             return draft_exp
@@ -839,6 +843,7 @@ def gen(self, trace: DSTrace) -> DSExperiment:
             pipeline=pipeline,
         )
 
+        # NOTE: we currently don't support inject diverse problems for the parallel + multi-trace mode,
         if DS_RD_SETTING.enable_inject_diverse and len(trace.hist) > 0:
             if len(trace.current_selection) == 0:
                 # start a new sub-trace, and inject diverse problems.
diff --git a/rdagent/scenarios/data_science/proposal/exp_gen/trace_scheduler.py b/rdagent/scenarios/data_science/proposal/exp_gen/trace_scheduler.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from rdagent.scenarios.data_science.proposal.exp_gen.base import DSTrace
+
+
+class TraceScheduler(ABC):
+    """
+    An abstract base class for trace scheduling strategies.
+    Determines which active trace to expand next during parallel exploration.
+    """
+
+    @abstractmethod
+    async def select_trace(self, trace: DSTrace) -> tuple[int, ...]:
+        """
+        Selects the next trace to expand.
+
+        This method must be async to allow for safe concurrent access.
+
+        Args:
+            trace: The DSTrace object containing the full experiment history.
+
+        Returns:
+            A tuple representing the selection of the parent node for the new experiment.
+            e.g., (leaf_idx,) for an existing trace, or trace.NEW_ROOT for a new one.
+        """
+        raise NotImplementedError
+
+
+class RoundRobinScheduler(TraceScheduler):
+    """
+    A concurrency-safe scheduling strategy that cycles through active traces
+    in a round-robin fashion.
+
+    NOTE: we don't need to use asyncio.Lock here as the kickoff_loop ensures the ExpGen is always sequential, instead of parallel.
+    """
+
+    def __init__(self):
+        self._last_selected_leaf_id = -1
+
+    async def select_trace(self, trace: DSTrace) -> tuple[int, ...]:
+        """
+        Atomically selects the next leaf node from the trace in order.
+        """
+
+        leaves = trace.get_leaves()
+        if not leaves:
+            # This is the very first experiment in a new tree.
+            return trace.NEW_ROOT
+
+        # Find the index of the last selected leaf in the current list of leaves
+        try:
+            current_position = leaves.index(self._last_selected_leaf_id)
+            # Move to the next position, wrapping around if necessary
+            next_position = (current_position + 1) % len(leaves)
+        except ValueError:
+            # This can happen if the last selected leaf is no longer a leaf
+            # (it has been expanded) or if this is the first selection.
+            # In either case, start from the beginning.
+            next_position = 0
+
+        selected_leaf = leaves[next_position]
+        self._last_selected_leaf_id = selected_leaf
+
+        return (selected_leaf,)
diff --git a/rdagent/utils/workflow/loop.py b/rdagent/utils/workflow/loop.py
@@ -136,6 +136,11 @@ def get_semaphore(self, step_name: str) -> asyncio.Semaphore:
         if isinstance(limit := RD_AGENT_SETTINGS.step_semaphore, dict):
             limit = limit.get(step_name, 1)  # default to 1 if not specified
 
+        # NOTE: we assume the record step is always the last step to modify the global environment,
+        # so we set the limit to 1 to avoid race condition
+        if step_name == "record":
+            limit = 1
+
         if step_name not in self.semaphores:
             self.semaphores[step_name] = asyncio.Semaphore(limit)
         return self.semaphores[step_name]

Original file line number	Diff line number	Diff line change
`@@ -218,7 +218,7 @@ def gen(self, trace: DSTrace) -> DSExperiment:`
`218`	`218`	`scenario_desc=scenario_desc,`
`219`	`219`	`sota_exp_desc=sota_exp_desc,`
`220`	`220`	`sota_exp=sota_exp_fb[0] if sota_exp_fb else None,`
`221`		`- hypothesis=new_hypothesis,`
	`221`	`+ hypotheses=[new_hypothesis],`
`222`	`222`	`pipeline=DS_RD_SETTING.coder_on_whole_pipeline,`
`223`	`223`	`failed_exp_feedback_list_desc="",`
`224`	`224`	`)`