Skip to content

Commit 2b9427a

Browse files
authored
feat: truncate by time (#863)
* refactor: move session file lookup logic to folder utils module * print more info * lint
1 parent 50ea033 commit 2b9427a

File tree

3 files changed

+96
-24
lines changed

3 files changed

+96
-24
lines changed

rdagent/log/mle_summary.py

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import pickle
23
import re
34
from collections import defaultdict
45
from datetime import timedelta
@@ -13,13 +14,15 @@
1314
from rdagent.core.experiment import FBWorkspace
1415
from rdagent.core.proposal import ExperimentFeedback
1516
from rdagent.log.storage import FileStorage
17+
from rdagent.log.utils.folder import get_first_session_file_after_duration
1618
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
1719
from rdagent.scenarios.data_science.test_eval import (
1820
MLETestEval,
1921
NoTestEvalError,
2022
get_test_eval,
2123
)
2224
from rdagent.scenarios.kaggle.kaggle_crawler import score_rank
25+
from rdagent.utils.workflow import LoopBase
2326

2427
test_eval = get_test_eval()
2528

@@ -71,31 +74,24 @@ def save_all_grade_info(log_folder):
7174
print(f"Error in {log_trace_path}: {e}")
7275

7376

74-
def first_li_si_after_one_time(log_path: Path, hours: int = 12) -> tuple[int, int, str]:
75-
"""
76-
Based on the hours, find the stop loop id and step id (the first step after <hours> hours).
77-
Args:
78-
log_path (Path): The path to the log folder (contains many log traces).
79-
hours (int): The number of hours to stat.
80-
Returns:
81-
tuple[int, int, str]: The loop id, step id and function name.
82-
"""
83-
session_path = log_path / "__session__"
84-
max_li = max(int(p.name) for p in session_path.iterdir() if p.is_dir() and p.name.isdigit())
85-
max_step = max(int(p.name.split("_")[0]) for p in (session_path / str(max_li)).iterdir() if p.is_file())
86-
rdloop_obj_p = next((session_path / str(max_li)).glob(f"{max_step}_*"))
77+
def _get_loop_and_fn_after_hours(log_folder: Path, hours: int):
78+
stop_session_fp = get_first_session_file_after_duration(log_folder, f"{hours}h")
8779

88-
rdloop_obj = DataScienceRDLoop.load(rdloop_obj_p, do_truncate=False)
89-
loop_trace = rdloop_obj.loop_trace
90-
si2fn = rdloop_obj.steps
80+
with stop_session_fp.open("rb") as f:
81+
session_obj: LoopBase = pickle.load(f)
82+
83+
loop_trace = session_obj.loop_trace
84+
stop_li = max(loop_trace.keys())
85+
last_loop = loop_trace[stop_li]
86+
last_step = last_loop[-1]
87+
stop_fn = session_obj.steps[last_step.step_idx]
88+
print(f"Stop Loop: {stop_li=}, {stop_fn=}")
89+
files = sorted(
90+
(log_folder / "__session__").glob("*/*_*"), key=lambda f: (int(f.parent.name), int(f.name.split("_")[0]))
91+
)
9192

92-
duration = timedelta(seconds=0)
93-
for li, lts in loop_trace.items():
94-
for lt in lts:
95-
si = lt.step_idx
96-
duration += lt.end - lt.start
97-
if duration > timedelta(hours=hours):
98-
return li, si, si2fn[si]
93+
print(f"Max Session: {files[-1:]=}")
94+
return stop_li, stop_fn
9995

10096

10197
def summarize_folder(log_folder: Path, hours: int | None = None):
@@ -133,7 +129,7 @@ def summarize_folder(log_folder: Path, hours: int | None = None):
133129
grade_output = None
134130

135131
if hours:
136-
stop_li, stop_si, stop_fn = first_li_si_after_one_time(log_trace_path, hours)
132+
stop_li, stop_fn = _get_loop_and_fn_after_hours(log_trace_path, hours)
137133

138134
for msg in FileStorage(log_trace_path).iter_msg(): # messages in log trace
139135
loop_id, fn = extract_loopid_func_name(msg.tag)

rdagent/log/utils/folder.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""
2+
This module provides some useful functions for working with logger folders.
3+
"""
4+
5+
import pickle
6+
from pathlib import Path
7+
8+
import pandas as pd
9+
10+
from rdagent.utils.workflow import LoopBase
11+
12+
13+
def get_first_session_file_after_duration(log_folder: str | Path, duration: str | pd.Timedelta) -> Path:
14+
log_folder = Path(log_folder)
15+
duration_dt = pd.Timedelta(duration)
16+
# iterate the dump steps in increasing order
17+
files = sorted(
18+
(log_folder / "__session__").glob("*/*_*"), key=lambda f: (int(f.parent.name), int(f.name.split("_")[0]))
19+
)
20+
fp = None
21+
for fp in files:
22+
with fp.open("rb") as f:
23+
session_obj: LoopBase = pickle.load(f)
24+
timer = session_obj.timer
25+
all_duration = timer.all_duration
26+
remain_time_duration = timer.remain_time_duration
27+
if all_duration is None or remain_time_duration is None:
28+
msg = "Timer is not configured"
29+
raise ValueError(msg)
30+
time_spent = all_duration - remain_time_duration
31+
if time_spent >= duration_dt:
32+
break
33+
if fp is None:
34+
msg = f"No session file found after duration {duration}"
35+
raise ValueError(msg)
36+
return fp
37+
38+
39+
def first_li_si_after_one_time(log_path: Path, hours: int = 12) -> tuple[int, int, str]:
40+
"""
41+
Based on the hours, find the stop loop id and step id (the first step after <hours> hours).
42+
Args:
43+
log_path (Path): The path to the log folder (contains many log traces).
44+
hours (int): The number of hours to stat.
45+
Returns:
46+
tuple[int, int, str]: The loop id, step id and function name.
47+
"""
48+
session_path = log_path / "__session__"
49+
max_li = max(int(p.name) for p in session_path.iterdir() if p.is_dir() and p.name.isdigit())
50+
max_step = max(int(p.name.split("_")[0]) for p in (session_path / str(max_li)).iterdir() if p.is_file())
51+
rdloop_obj_p = next((session_path / str(max_li)).glob(f"{max_step}_*"))
52+
53+
rdloop_obj = DataScienceRDLoop.load(rdloop_obj_p, do_truncate=False)
54+
loop_trace = rdloop_obj.loop_trace
55+
si2fn = rdloop_obj.steps
56+
57+
duration = timedelta(seconds=0)
58+
for li, lts in loop_trace.items():
59+
for lt in lts:
60+
si = lt.step_idx
61+
duration += lt.end - lt.start
62+
if duration > timedelta(hours=hours):
63+
return li, si, si2fn[si]
64+
65+
66+
if __name__ == "__main__":
67+
from rdagent.app.data_science.loop import DataScienceRDLoop
68+
69+
f = get_first_session_file_after_duration("<path to log aptos2019-blindness-detection>", pd.Timedelta("12h"))
70+
71+
with f.open("rb") as f:
72+
session_obj: LoopBase = pickle.load(f)
73+
loop_trace = session_obj.loop_trace
74+
last_loop = loop_trace[max(loop_trace.keys())]
75+
last_step = last_loop[-1]
76+
session_obj.steps[last_step.step_idx]

0 commit comments

Comments
 (0)