Skip to content

Serious and weird memory leak when dumping dictionary containing python object and non-English character #631

@nhancdt2602

Description

@nhancdt2602

What did you do?

Use ujson.dumps to dumps dictionary containing python object and non-English character

What did you expect to happen?

The memory keeps stable

What actually happened?

The memory increases without limit

What versions are you using?

  • OS: 6.5.0-28-generic
  • Python: 3.10.12
  • UltraJSON: 5.9.0

Please include code that reproduces the issue.

from typing import List

import psutil
import ujson
import orjson


class ExampleObject:
    def __init__(self, document: str, items: List[str]):
        self.document = document
        self.items = items

    def __repr__(self):
        return f"PromptSearchItemMock(document={self.document}, items={self.items})"


def _json_fallback_handler(obj):
    """
    Serialize custom datatypes and pass the rest to __structlog__ & repr().
    """
    return repr(obj)


def do_dump_mem_leak_1():
    ex = {
        'search_results': [ExampleObject(
            document='生日快乐',
            items=['computer']) for _ in range(3)]
    }
    ujson.dumps(ex, ensure_ascii=True, default=_json_fallback_handler)  # This cause memory increase without limit
    # orjson.dumps(ex, default=_json_fallback_handler) # This keep memory stable


def do_dump_no_leak_1():
    ex = {
        'search_results': [str(ExampleObject(
            document='study hard, study well, code well, code hard, play hard, play well',
            items=['computer'])) for _ in range(3)]
    }
    ujson.dumps(ex, ensure_ascii=False, default=_json_fallback_handler)


def do_dump_no_leak_2():
    ex = {
        'search_results': [ExampleObject(
            document='study hard, study well, code well, code hard, play hard, play well',
            items=['computer']) for _ in range(3)]
    }
    ujson.dumps(ex, ensure_ascii=False, default=_json_fallback_handler)


def do_dump_no_leak_3():
    ex = {
        'search_results': [ExampleObject(
            document='生日快乐',
            items=['computer']) for _ in range(3)]
    }
    orjson.dumps(ex, default=_json_fallback_handler)  # This keep memory stable


process = psutil.Process()


def main():
    for i in range(100_000_000):
        do_dump_mem_leak_1()  # Leak with "ujson" and non-English characters
        # do_dump_no_leak_1()  # No leak with "ujson" and non-English characters but with str()
        # do_dump_no_leak_2()  # No leak with "ujson" but with English characters

        # do_dump_no_leak_3()  # No leak with "orjson" for all cases
        if i % 100_000 == 0:
            print(f'it = {i}, memory = {process.memory_info().rss / 1024 ** 2} MB')


if __name__ == '__main__':
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions