Skip to content

Commit 98f8483

Browse files
committed
gh-129005: Add bytearray._detach, use in readall
This removes the memory overhead of `_pyio.FileIO.readall` relative to `_io.FileIO.readall`, and significantly improves performance: ```bash # _io.FileIO.readall of a large file ./python -m test -M8g -uall test_largefile -m test.test_largefile.CLargeFileTest.test_large_read # _pyio.FileIO.readall of a large file ./python -m test -M8g -uall test_largefile -m test.test_largefile.PyLargeFileTest.test_large_read ``` `_io` takes: ~0.791s and uses ~2GB of RAM `_pyio` current: ~1.073s and uses ~4GB of RAM `_pyio` w/ bytearray._detach: ~0.887s and uses ~2GB of RAM
1 parent 84b02f3 commit 98f8483

4 files changed

Lines changed: 72 additions & 5 deletions

File tree

Lib/_pyio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1697,7 +1697,7 @@ def readall(self):
16971697
assert len(result) - bytes_read >= 1, \
16981698
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
16991699
result.resize(bytes_read)
1700-
return bytes(result)
1700+
return result._detach()
17011701

17021702
def readinto(self, buffer):
17031703
"""Same as RawIOBase.readinto()."""

Lib/test/test_largefile.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ class TestFileMethods(LargeFileTest):
5656
(i.e. > 2 GiB) files.
5757
"""
5858

59-
# _pyio.FileIO.readall() uses a temporary bytearray then casted to bytes,
60-
# so memuse=2 is needed
61-
@bigmemtest(size=size, memuse=2, dry_run=False)
59+
@bigmemtest(size=size, memuse=1, dry_run=False)
6260
def test_large_read(self, _size):
6361
# bpo-24658: Test that a read greater than 2GB does not fail.
6462
with self.open(TESTFN, "rb") as f:

Objects/bytearrayobject.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,6 +2455,47 @@ bytearray_decode_impl(PyByteArrayObject *self, const char *encoding,
24552455
return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
24562456
}
24572457

2458+
/*[clinic input]
2459+
@critical_section
2460+
bytearray._detach
2461+
2462+
Return existing storage without copying as a bytes and clear bytearray storage.
2463+
2464+
On error, bytearray will be left in a valid state, but buffer may be cleared. If
2465+
there are exports or the bytearray data is offset will throw rather than copy.
2466+
[clinic start generated code]*/
2467+
2468+
static PyObject *
2469+
bytearray__detach_impl(PyByteArrayObject *self)
2470+
/*[clinic end generated code: output=eac03c9d6f8d6230 input=5bb91fd1ce9b77dc]*/
2471+
{
2472+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
2473+
if (self->ob_exports > 0) {
2474+
PyErr_SetString(PyExc_BufferError,
2475+
"Existing exports of data: buffer cannot be detached");
2476+
return NULL;
2477+
}
2478+
if (self->ob_start != self->ob_bytes) {
2479+
PyErr_SetString(PyExc_BufferError,
2480+
"Buffer start offset from bytes: buffer cannot be detached; was the object sliced?");
2481+
return NULL;
2482+
}
2483+
2484+
/* buffer may be overallocated, ensure exact size */
2485+
if (_PyBytes_Resize(&self->ob_bytes_head, PyByteArray_GET_SIZE(self))) {
2486+
return NULL;
2487+
}
2488+
2489+
/* FIXME: Take buffer size? Bytes to copy from? */
2490+
PyObject *new_buffer = PyBytes_FromStringAndSize(NULL, 0);
2491+
if (!new_buffer) {
2492+
return NULL;
2493+
}
2494+
PyObject *old_buffer = self->ob_bytes_head;
2495+
bytearray_set_bytes(self, new_buffer, 0);
2496+
return old_buffer;
2497+
}
2498+
24582499
PyDoc_STRVAR(alloc_doc,
24592500
"B.__alloc__() -> int\n\
24602501
\n\
@@ -2709,6 +2750,7 @@ static PyMethodDef bytearray_methods[] = {
27092750
BYTEARRAY_COPY_METHODDEF
27102751
BYTEARRAY_COUNT_METHODDEF
27112752
BYTEARRAY_DECODE_METHODDEF
2753+
BYTEARRAY__DETACH_METHODDEF
27122754
BYTEARRAY_ENDSWITH_METHODDEF
27132755
{"expandtabs", _PyCFunction_CAST(bytearray_expandtabs),
27142756
METH_FASTCALL|METH_KEYWORDS, stringlib_expandtabs__doc__},

Objects/clinic/bytearrayobject.c.h

Lines changed: 28 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)