|
1 | 1 | import json |
| 2 | +import pickle |
2 | 3 | import re |
3 | 4 | from collections import defaultdict |
4 | 5 | from datetime import timedelta |
|
13 | 14 | from rdagent.core.experiment import FBWorkspace |
14 | 15 | from rdagent.core.proposal import ExperimentFeedback |
15 | 16 | from rdagent.log.storage import FileStorage |
| 17 | +from rdagent.log.utils.folder import get_first_session_file_after_duration |
16 | 18 | from rdagent.scenarios.data_science.experiment.experiment import DSExperiment |
17 | 19 | from rdagent.scenarios.data_science.test_eval import ( |
18 | 20 | MLETestEval, |
19 | 21 | NoTestEvalError, |
20 | 22 | get_test_eval, |
21 | 23 | ) |
22 | 24 | from rdagent.scenarios.kaggle.kaggle_crawler import score_rank |
| 25 | +from rdagent.utils.workflow import LoopBase |
23 | 26 |
|
24 | 27 | test_eval = get_test_eval() |
25 | 28 |
|
@@ -71,31 +74,24 @@ def save_all_grade_info(log_folder): |
71 | 74 | print(f"Error in {log_trace_path}: {e}") |
72 | 75 |
|
73 | 76 |
|
74 | | -def first_li_si_after_one_time(log_path: Path, hours: int = 12) -> tuple[int, int, str]: |
75 | | - """ |
76 | | - Based on the hours, find the stop loop id and step id (the first step after <hours> hours). |
77 | | - Args: |
78 | | - log_path (Path): The path to the log folder (contains many log traces). |
79 | | - hours (int): The number of hours to stat. |
80 | | - Returns: |
81 | | - tuple[int, int, str]: The loop id, step id and function name. |
82 | | - """ |
83 | | - session_path = log_path / "__session__" |
84 | | - max_li = max(int(p.name) for p in session_path.iterdir() if p.is_dir() and p.name.isdigit()) |
85 | | - max_step = max(int(p.name.split("_")[0]) for p in (session_path / str(max_li)).iterdir() if p.is_file()) |
86 | | - rdloop_obj_p = next((session_path / str(max_li)).glob(f"{max_step}_*")) |
| 77 | +def _get_loop_and_fn_after_hours(log_folder: Path, hours: int): |
| 78 | + stop_session_fp = get_first_session_file_after_duration(log_folder, f"{hours}h") |
87 | 79 |
|
88 | | - rdloop_obj = DataScienceRDLoop.load(rdloop_obj_p, do_truncate=False) |
89 | | - loop_trace = rdloop_obj.loop_trace |
90 | | - si2fn = rdloop_obj.steps |
| 80 | + with stop_session_fp.open("rb") as f: |
| 81 | + session_obj: LoopBase = pickle.load(f) |
| 82 | + |
| 83 | + loop_trace = session_obj.loop_trace |
| 84 | + stop_li = max(loop_trace.keys()) |
| 85 | + last_loop = loop_trace[stop_li] |
| 86 | + last_step = last_loop[-1] |
| 87 | + stop_fn = session_obj.steps[last_step.step_idx] |
| 88 | + print(f"Stop Loop: {stop_li=}, {stop_fn=}") |
| 89 | + files = sorted( |
| 90 | + (log_folder / "__session__").glob("*/*_*"), key=lambda f: (int(f.parent.name), int(f.name.split("_")[0])) |
| 91 | + ) |
91 | 92 |
|
92 | | - duration = timedelta(seconds=0) |
93 | | - for li, lts in loop_trace.items(): |
94 | | - for lt in lts: |
95 | | - si = lt.step_idx |
96 | | - duration += lt.end - lt.start |
97 | | - if duration > timedelta(hours=hours): |
98 | | - return li, si, si2fn[si] |
| 93 | + print(f"Max Session: {files[-1:]=}") |
| 94 | + return stop_li, stop_fn |
99 | 95 |
|
100 | 96 |
|
101 | 97 | def summarize_folder(log_folder: Path, hours: int | None = None): |
@@ -133,7 +129,7 @@ def summarize_folder(log_folder: Path, hours: int | None = None): |
133 | 129 | grade_output = None |
134 | 130 |
|
135 | 131 | if hours: |
136 | | - stop_li, stop_si, stop_fn = first_li_si_after_one_time(log_trace_path, hours) |
| 132 | + stop_li, stop_fn = _get_loop_and_fn_after_hours(log_trace_path, hours) |
137 | 133 |
|
138 | 134 | for msg in FileStorage(log_trace_path).iter_msg(): # messages in log trace |
139 | 135 | loop_id, fn = extract_loopid_func_name(msg.tag) |
|
0 commit comments