Skip to content

Commit a8c1684

Browse files
alxtkr77Alex Toker
andauthored
[Model Monitoring] Add configurable TimescaleDB connection pool timeout (ML-11775) (#9172)
## Summary - Add new config option `model_endpoint_monitoring.tsdb.connection_pool_timeout` to control TimescaleDB connection pool timeout - Default is 120 seconds (previously hardcoded to 30 seconds) - This allows API requests to wait longer for a database connection under high load ## Changes Made - Added `connection_pool_timeout` config in `mlrun/config.py` under `model_endpoint_monitoring.tsdb` - Updated `TimescaleDBConnection.pool` property to use the configurable timeout - Added unit tests to verify the config is correctly applied Co-authored-by: Alex Toker <[email protected]>
1 parent 8d12a3d commit a8c1684

File tree

3 files changed

+46
-1
lines changed

3 files changed

+46
-1
lines changed

mlrun/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,9 @@
682682
# When True, automatically create/generate database name using system_id if not explicitly
683683
# specified in the connection string. When False, use the database from connection string as-is.
684684
"auto_create_database": True,
685+
# Connection pool timeout in seconds. This is the maximum time to wait for a connection
686+
# from the pool before raising an error.
687+
"connection_pool_timeout": 120,
685688
},
686689
},
687690
"secret_stores": {

mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from psycopg_pool import ConnectionPool
2424

2525
import mlrun.errors
26+
from mlrun.config import config
2627
from mlrun.model_monitoring.db.tsdb.preaggregate import PreAggregateManager
2728
from mlrun.utils import logger
2829

@@ -127,7 +128,9 @@ def pool(self) -> ConnectionPool:
127128
conninfo=self._dsn,
128129
min_size=self._min_connections,
129130
max_size=self._max_connections,
130-
timeout=30.0,
131+
timeout=float(
132+
config.model_endpoint_monitoring.tsdb.connection_pool_timeout
133+
),
131134
)
132135
return self._pool
133136

tests/model_monitoring/db/tsdb/timescaledb/test_timescaledb_connection.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import psycopg
1818
import pytest
1919

20+
import mlrun.config
2021
import mlrun.errors
2122
from mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection import (
2223
Statement,
@@ -270,3 +271,41 @@ def test_persistent_failure_behavior_verification(
270271
# Verify error message
271272
assert "deadlock persisted after 3 retries" in str(exc_info.value)
272273
assert mock_cursor.execute.call_count == 4 # Initial + 3 retries
274+
275+
276+
class TestTimescaleDBConnectionPoolTimeout:
277+
"""Test connection pool timeout configuration (ML-11775)."""
278+
279+
def test_pool_uses_configured_timeout(self):
280+
"""Test that ConnectionPool is created with timeout from config."""
281+
# Set custom timeout in config
282+
original_timeout = (
283+
mlrun.config.config.model_endpoint_monitoring.tsdb.connection_pool_timeout
284+
)
285+
mlrun.config.config.model_endpoint_monitoring.tsdb.connection_pool_timeout = 90
286+
287+
try:
288+
with patch(
289+
"mlrun.model_monitoring.db.tsdb.timescaledb.timescaledb_connection.ConnectionPool"
290+
) as mock_pool_class:
291+
conn = TimescaleDBConnection(
292+
dsn="postgres://test:test@localhost:5432/test",
293+
max_connections=5,
294+
)
295+
# Access the pool property to trigger pool creation
296+
_ = conn.pool
297+
298+
# Verify ConnectionPool was called with the configured timeout
299+
mock_pool_class.assert_called_once()
300+
call_kwargs = mock_pool_class.call_args.kwargs
301+
assert call_kwargs["timeout"] == 90.0
302+
finally:
303+
# Restore original value
304+
mlrun.config.config.model_endpoint_monitoring.tsdb.connection_pool_timeout = original_timeout
305+
306+
def test_pool_default_timeout_is_120(self):
307+
"""Test that the default connection pool timeout is 120 seconds."""
308+
default_timeout = (
309+
mlrun.config.config.model_endpoint_monitoring.tsdb.connection_pool_timeout
310+
)
311+
assert default_timeout == 120

0 commit comments

Comments
 (0)