Insert performance issue of ORM models with multiple Enums and async engine #10356
-
Describe the bugAdding a second Enum column to an ORM model leads to unpredictable performance regression during the insert commit when running with an async engine and null pool size. This issue is not reproduced with a synchronous engine. SQLAlchemy Version in Use2.0.20 DBAPI (i.e. the database driver)asyncpg 0.28.0 Database Vendor and Major VersionPostgreSQL 15 Python Version3.11.4 Operating systemOSX To Reproduceimport asyncio
import enum
import time
from sqlalchemy import JSON, NullPool
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
class EnumOne(enum.Enum):
VALUE = "value"
class EnumTwo(enum.Enum):
VALUE = "value"
class EnumThree(enum.Enum):
VALUE = "value"
class Base(DeclarativeBase):
...
class ModelWithOneEnum(Base):
__tablename__ = "model_with_one_enum"
id: Mapped[int] = mapped_column(primary_key=True)
enum_one: Mapped[EnumOne]
# To demonstrate that it's not about additional column, can be removed
third_column: Mapped[dict] = mapped_column(JSON)
class ModelWithTwoEnums(Base):
__tablename__ = "model_with_two_enums"
id: Mapped[int] = mapped_column(primary_key=True)
enum_one: Mapped[EnumOne]
enum_two: Mapped[EnumTwo]
class ModelWithThreeEnums(Base):
__tablename__ = "model_with_three_enums"
id: Mapped[int] = mapped_column(primary_key=True)
enum_one: Mapped[EnumOne]
enum_two: Mapped[EnumTwo]
enum_three: Mapped[EnumThree]
async def main() -> None:
connection_string = "postgresql+asyncpg://postgres:postgres@localhost:5432/postgres"
setup_engine = create_async_engine(connection_string)
null_pool_engine = create_async_engine(connection_string, poolclass=NullPool)
with_pool_size_engine = create_async_engine(connection_string, pool_size=10)
async with setup_engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await conn.run_sync(Base.metadata.create_all)
async with AsyncSession(null_pool_engine) as session:
session.add(ModelWithOneEnum(enum_one=EnumOne.VALUE, third_column=[1] * 1000))
start_time = time.time()
await session.commit()
print(f"NullPool + ModelWithOneEnum: {time.time() - start_time}")
async with AsyncSession(null_pool_engine) as session:
session.add(ModelWithTwoEnums(enum_one=EnumOne.VALUE, enum_two=EnumTwo.VALUE))
start_time = time.time()
await session.commit()
print(f"NullPool + ModelWithTwoEnums: {time.time() - start_time}")
async with AsyncSession(null_pool_engine) as session:
session.add(
ModelWithThreeEnums(
enum_one=EnumOne.VALUE,
enum_two=EnumTwo.VALUE,
enum_three=EnumThree.VALUE,
)
)
start_time = time.time()
await session.commit()
print(f"NullPool + ModelWithThreeEnums: {time.time() - start_time}")
async with AsyncSession(with_pool_size_engine) as session:
session.add(ModelWithOneEnum(enum_one=EnumOne.VALUE, third_column=[1] * 1000))
start_time = time.time()
await session.commit()
print(f"WithPool + ModelWithOneEnum: {time.time() - start_time}")
async with AsyncSession(with_pool_size_engine) as session:
session.add(ModelWithTwoEnums(enum_one=EnumOne.VALUE, enum_two=EnumTwo.VALUE))
start_time = time.time()
await session.commit()
print(f"WithPool + ModelWithTwoEnums: {time.time() - start_time}")
async with AsyncSession(with_pool_size_engine) as session:
session.add(
ModelWithThreeEnums(
enum_one=EnumOne.VALUE,
enum_two=EnumTwo.VALUE,
enum_three=EnumThree.VALUE,
)
)
start_time = time.time()
await session.commit()
print(f"WithPool + ModelWithThreeEnums: {time.time() - start_time}")
if __name__ == "__main__":
asyncio.run(main())ErrorTimings on my machine: Additional context
Offtop: the reason we have to use the null pool is that we couldn't find another way to make |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 12 replies
-
|
the asyncpg driver is known to do some unusual additional queries to introspect ENUM datatypes. Might this be an asyncpg issue? SQLAlchemy doesn't do anything differently with the different drivers. |
Beta Was this translation helpful? Give feedback.
-
|
Hi, Like muke said, can you reproduce the same behaviour using psycopg3 used with asyncio? |
Beta Was this translation helpful? Give feedback.
-
|
I opened MagicStack/asyncpg#1078 for them. if they can provide an option to skip this type introspection we can integrate that. |
Beta Was this translation helpful? Give feedback.
the asyncpg driver is known to do some unusual additional queries to introspect ENUM datatypes. Might this be an asyncpg issue? SQLAlchemy doesn't do anything differently with the different drivers.