@@ -14,6 +14,7 @@ def __init__(self, labels: Dict[str, str]) -> None:
1414 from prometheus_client import Counter , Histogram
1515 self .last_log_time = time .time ()
1616 self .labels = labels
17+ self .metric_prefix = "trtllm_"
1718
1819 self .finish_reason_label = {
1920 MetricsCollector .labelname_finish_reason : "unknown"
@@ -24,12 +25,12 @@ def __init__(self, labels: Dict[str, str]) -> None:
2425 }
2526
2627 self .counter_request_success = Counter (
27- name = "request_success_total" ,
28+ name = self . metric_prefix + "request_success_total" ,
2829 documentation = "Count of successfully processed requests." ,
2930 labelnames = self .labels_with_finished_reason .keys ())
3031
3132 self .histogram_e2e_time_request = Histogram (
32- name = "e2e_request_latency_seconds" ,
33+ name = self . metric_prefix + "e2e_request_latency_seconds" ,
3334 documentation = "Histogram of end to end request latency in seconds." ,
3435 buckets = [
3536 0.3 , 0.5 , 0.8 , 1.0 , 1.5 , 2.0 , 2.5 , 5.0 , 10.0 , 15.0 , 20.0 , 30.0 ,
@@ -38,7 +39,7 @@ def __init__(self, labels: Dict[str, str]) -> None:
3839 labelnames = self .labels .keys ())
3940
4041 self .histogram_time_to_first_token = Histogram (
41- name = "time_to_first_token_seconds" ,
42+ name = self . metric_prefix + "time_to_first_token_seconds" ,
4243 documentation = "Histogram of time to first token in seconds." ,
4344 buckets = [
4445 0.001 , 0.005 , 0.01 , 0.02 , 0.04 , 0.06 , 0.08 , 0.1 , 0.25 , 0.5 ,
@@ -48,7 +49,7 @@ def __init__(self, labels: Dict[str, str]) -> None:
4849 labelnames = self .labels .keys ())
4950
5051 self .histogram_time_per_output_token = Histogram (
51- name = "time_per_output_token_seconds" ,
52+ name = self . metric_prefix + "time_per_output_token_seconds" ,
5253 documentation = "Histogram of time per output token in seconds." ,
5354 buckets = [
5455 0.01 , 0.025 , 0.05 , 0.075 , 0.1 , 0.15 , 0.2 , 0.3 , 0.4 , 0.5 , 0.75 ,
@@ -57,7 +58,7 @@ def __init__(self, labels: Dict[str, str]) -> None:
5758 labelnames = self .labels .keys ())
5859
5960 self .histogram_queue_time_request = Histogram (
60- name = "request_queue_time_seconds" ,
61+ name = self . metric_prefix + "request_queue_time_seconds" ,
6162 documentation =
6263 "Histogram of time spent in WAITING phase for request." ,
6364 buckets = [
0 commit comments