Skip to content

Commit b0eeb35

Browse files
habermancopybara-github
authored andcommitted
Fixed Python memory leak in map lookup.
Previously we were allocating memory on the message's arena every time we performed a `map[key]` or `map.get(key)` operation. This is unnecessary, as the key's data is only needed ephemerally, for the duration of the lookup, and we can therefore alias the Python object's string data instead of copying it. This required fixing a bug in the convert.c operation. Previously in the `arena==NULL` case, if the user passes a bytes object instead of a unicode string, the code would return a pointer to a temporary Python object that had already been freed, leading to use-after-free. I fixed this by referencing the bytes object's data directly, and using utf8_range to verify the UTF-8. Fixes: #14571 PiperOrigin-RevId: 578563555
1 parent f083530 commit b0eeb35

4 files changed

Lines changed: 24 additions & 18 deletions

File tree

python/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,5 +203,6 @@ py_extension(
203203
"//upb/util:compare",
204204
"//upb/util:def_to_proto",
205205
"//upb/util:required_fields",
206+
"@utf8_range",
206207
],
207208
)

python/convert.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "upb/message/map.h"
3636
#include "upb/reflection/message.h"
3737
#include "upb/util/compare.h"
38+
#include "utf8_range.h"
3839

3940
// Must be last.
4041
#include "upb/port/def.inc"
@@ -259,20 +260,27 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
259260
}
260261
case kUpb_CType_String: {
261262
Py_ssize_t size;
262-
const char* ptr;
263-
PyObject* unicode = NULL;
264263
if (PyBytes_Check(obj)) {
265-
unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
266-
if (!obj) return false;
264+
// Use the object's bytes if they are valid UTF-8.
265+
char* ptr;
266+
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
267+
if (utf8_range2((const unsigned char*)ptr, size) != 0) {
268+
// Invalid UTF-8. Try to convert the message to a Python Unicode
269+
// object, even though we know this will fail, just to get the
270+
// idiomatic Python error message.
271+
obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
272+
assert(!obj);
273+
return false;
274+
}
275+
*val = PyUpb_MaybeCopyString(ptr, size, arena);
276+
return true;
277+
} else {
278+
const char* ptr;
279+
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
280+
if (PyErr_Occurred()) return false;
281+
*val = PyUpb_MaybeCopyString(ptr, size, arena);
282+
return true;
267283
}
268-
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
269-
if (PyErr_Occurred()) {
270-
Py_XDECREF(unicode);
271-
return false;
272-
}
273-
*val = PyUpb_MaybeCopyString(ptr, size, arena);
274-
Py_XDECREF(unicode);
275-
return true;
276284
}
277285
case kUpb_CType_Message:
278286
PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");

python/google/protobuf/internal/message_test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848

4949
warnings.simplefilter('error', DeprecationWarning)
5050

51-
5251
@_parameterized.named_parameters(('_proto2', unittest_pb2),
5352
('_proto3', unittest_proto3_arena_pb2))
5453
@testing_refleaks.TestCase

python/map.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ int PyUpb_MapContainer_AssignSubscript(PyObject* _self, PyObject* key,
179179
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
180180
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
181181
upb_MessageValue u_key, u_val;
182-
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return -1;
182+
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return -1;
183183

184184
if (val) {
185185
if (!PyUpb_PyToUpb(val, val_f, &u_val, arena)) return -1;
@@ -200,9 +200,8 @@ PyObject* PyUpb_MapContainer_Subscript(PyObject* _self, PyObject* key) {
200200
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
201201
const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
202202
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
203-
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
204203
upb_MessageValue u_key, u_val;
205-
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
204+
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
206205
if (!map || !upb_Map_Get(map, u_key, &u_val)) {
207206
map = PyUpb_MapContainer_EnsureReified(_self);
208207
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
@@ -256,9 +255,8 @@ static PyObject* PyUpb_MapContainer_Get(PyObject* _self, PyObject* args,
256255
const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
257256
const upb_FieldDef* key_f = upb_MessageDef_Field(entry_m, 0);
258257
const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
259-
upb_Arena* arena = PyUpb_Arena_Get(self->arena);
260258
upb_MessageValue u_key, u_val;
261-
if (!PyUpb_PyToUpb(key, key_f, &u_key, arena)) return NULL;
259+
if (!PyUpb_PyToUpb(key, key_f, &u_key, NULL)) return NULL;
262260
if (map && upb_Map_Get(map, u_key, &u_val)) {
263261
return PyUpb_UpbToPy(u_val, val_f, self->arena);
264262
}

0 commit comments

Comments
 (0)