-
-
Notifications
You must be signed in to change notification settings - Fork 34.6k
Expand file tree
/
Copy pathpickle.py
More file actions
1955 lines (1695 loc) · 69.2 KB
/
pickle.py
File metadata and controls
1955 lines (1695 loc) · 69.2 KB
Edit and raw actions
OlderNewer
1
"""Create portable serialized representations of Python objects.
2
3
See module copyreg for a mechanism for registering custom picklers.
4
See module pickletools source for extensive comments.
5
6
Classes:
7
8
Pickler
9
Unpickler
10
11
Functions:
12
13
dump(object, file)
14
dumps(object) -> string
15
load(file) -> object
16
loads(bytes) -> object
17
18
Misc variables:
19
20
format_version
21
compatible_formats
22
23
"""
24
25
from types import FunctionType
26
from copyreg import dispatch_table
27
from copyreg import _extension_registry, _inverted_registry, _extension_cache
28
from itertools import batched
29
from functools import partial
30
import sys
31
from sys import maxsize
32
from struct import pack, unpack
33
import io
34
import codecs
35
import _compat_pickle
36
37
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38
"Unpickler", "dump", "dumps", "load", "loads"]
39
40
try:
41
from _pickle import PickleBuffer
42
__all__.append("PickleBuffer")
43
_HAVE_PICKLE_BUFFER = True
44
except ImportError:
45
_HAVE_PICKLE_BUFFER = False
46
47
48
# Shortcut for use in isinstance testing
49
bytes_types = (bytes, bytearray)
50
51
# These are purely informational; no code uses these.
52
format_version = "5.0" # File format version we write
53
compatible_formats = ["1.0", # Original protocol 0
54
"1.1", # Protocol 0 with INST added
55
"1.2", # Original protocol 1
56
"1.3", # Protocol 1 with BINFLOAT added
57
"2.0", # Protocol 2
58
"3.0", # Protocol 3
59
"4.0", # Protocol 4
60
"5.0", # Protocol 5
61
] # Old format versions we can read
62
63
# This is the highest protocol number we know how to read.
64
HIGHEST_PROTOCOL = 5
65
66
# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
67
# Only bump this if the oldest still supported version of Python already
68
# includes it.
69
DEFAULT_PROTOCOL = 5
70
71
class PickleError(Exception):
72
"""A common base class for the other pickling exceptions."""
73
pass
74
75
class PicklingError(PickleError):
76
"""This exception is raised when an unpicklable object is passed to the
77
dump() method.
78
79
"""
80
pass
81
82
class UnpicklingError(PickleError):
83
"""This exception is raised when there is a problem unpickling an object,
84
such as a security violation.
85
86
Note that other exceptions may also be raised during unpickling, including
87
(but not necessarily limited to) AttributeError, EOFError, ImportError,
88
and IndexError.
89
90
"""
91
pass
92
93
# An instance of _Stop is raised by Unpickler.load_stop() in response to
94
# the STOP opcode, passing the object that is the result of unpickling.
95
class _Stop(Exception):
96
def __init__(self, value):
97
self.value = value
98
99
# Pickle opcodes. See pickletools.py for extensive docs. The listing
100
# here is in kind-of alphabetical order of 1-character pickle code.
101
# pickletools groups them by purpose.
102
103
MARK = b'(' # push special markobject on stack
104
STOP = b'.' # every pickle ends with STOP
105
POP = b'0' # discard topmost stack item
106
POP_MARK = b'1' # discard stack top through topmost markobject
107
DUP = b'2' # duplicate top stack item
108
FLOAT = b'F' # push float object; decimal string argument
109
INT = b'I' # push integer or bool; decimal string argument
110
BININT = b'J' # push four-byte signed int
111
BININT1 = b'K' # push 1-byte unsigned int
112
LONG = b'L' # push long; decimal string argument
113
BININT2 = b'M' # push 2-byte unsigned int
114
NONE = b'N' # push None
115
PERSID = b'P' # push persistent object; id is taken from string arg
116
BINPERSID = b'Q' # " " " ; " " " " stack
117
REDUCE = b'R' # apply callable to argtuple, both on stack
118
STRING = b'S' # push string; NL-terminated string argument
119
BINSTRING = b'T' # push string; counted binary string argument
120
SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
121
UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
122
BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
123
APPEND = b'a' # append stack top to list below it
124
BUILD = b'b' # call __setstate__ or __dict__.update()
125
GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
126
DICT = b'd' # build a dict from stack items
127
EMPTY_DICT = b'}' # push empty dict
128
APPENDS = b'e' # extend list on stack by topmost stack slice
129
GET = b'g' # push item from memo on stack; index is string arg
130
BINGET = b'h' # " " " " " " ; " " 1-byte arg
131
INST = b'i' # build & push class instance
132
LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
133
LIST = b'l' # build list from topmost stack items
134
EMPTY_LIST = b']' # push empty list
135
OBJ = b'o' # build & push class instance
136
PUT = b'p' # store stack top in memo; index is string arg
137
BINPUT = b'q' # " " " " " ; " " 1-byte arg
138
LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
139
SETITEM = b's' # add key+value pair to dict
140
TUPLE = b't' # build tuple from topmost stack items
141
EMPTY_TUPLE = b')' # push empty tuple
142
SETITEMS = b'u' # modify dict by adding topmost key+value pairs
143
BINFLOAT = b'G' # push float; arg is 8-byte float encoding
144
145
TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
146
FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
147
148
# Protocol 2
149
150
PROTO = b'\x80' # identify pickle protocol
151
NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
152
EXT1 = b'\x82' # push object from extension registry; 1-byte index
153
EXT2 = b'\x83' # ditto, but 2-byte index
154
EXT4 = b'\x84' # ditto, but 4-byte index
155
TUPLE1 = b'\x85' # build 1-tuple from stack top
156
TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
157
TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
158
NEWTRUE = b'\x88' # push True
159
NEWFALSE = b'\x89' # push False
160
LONG1 = b'\x8a' # push long from < 256 bytes
161
LONG4 = b'\x8b' # push really big long
162
163
_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
164
165
# Protocol 3 (Python 3.x)
166
167
BINBYTES = b'B' # push bytes; counted binary string argument
168
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
169
170
# Protocol 4
171
172
SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
173
BINUNICODE8 = b'\x8d' # push very long string
174
BINBYTES8 = b'\x8e' # push very long bytes string
175
EMPTY_SET = b'\x8f' # push empty set on the stack
176
ADDITEMS = b'\x90' # modify set by adding topmost stack items
177
FROZENSET = b'\x91' # build frozenset from topmost stack items
178
NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
179
STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
180
MEMOIZE = b'\x94' # store top of the stack in memo
181
FRAME = b'\x95' # indicate the beginning of a new frame
182
183
# Protocol 5
184
185
BYTEARRAY8 = b'\x96' # push bytearray
186
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
187
READONLY_BUFFER = b'\x98' # make top of stack readonly
188
189
__all__.extend(x for x in dir() if x.isupper() and not x.startswith('_'))
190
191
192
# Data larger than this will be read in chunks, to prevent extreme
193
# overallocation.
194
_MIN_READ_BUF_SIZE = (1 << 20)
195
196
197
class _Framer:
198
199
_FRAME_SIZE_MIN = 4
200
_FRAME_SIZE_TARGET = 64 * 1024
201
202
def __init__(self, file_write):
203
self.file_write = file_write
204
self.current_frame = None
205
206
def start_framing(self):
207
self.current_frame = io.BytesIO()
208
209
def end_framing(self):
210
if self.current_frame and self.current_frame.tell() > 0:
211
self.commit_frame(force=True)
212
self.current_frame = None
213
214
def commit_frame(self, force=False):
215
if self.current_frame:
216
f = self.current_frame
217
if f.tell() >= self._FRAME_SIZE_TARGET or force:
218
data = f.getbuffer()
219
write = self.file_write
220
if len(data) >= self._FRAME_SIZE_MIN:
221
# Issue a single call to the write method of the underlying
222
# file object for the frame opcode with the size of the
223
# frame. The concatenation is expected to be less expensive
224
# than issuing an additional call to write.
225
write(FRAME + pack("<Q", len(data)))
226
227
# Issue a separate call to write to append the frame
228
# contents without concatenation to the above to avoid a
229
# memory copy.
230
write(data)
231
232
# Start the new frame with a new io.BytesIO instance so that
233
# the file object can have delayed access to the previous frame
234
# contents via an unreleased memoryview of the previous
235
# io.BytesIO instance.
236
self.current_frame = io.BytesIO()
237
238
def write(self, data):
239
if self.current_frame:
240
return self.current_frame.write(data)
241
else:
242
return self.file_write(data)
243
244
def write_large_bytes(self, header, payload):
245
write = self.file_write
246
if self.current_frame:
247
# Terminate the current frame and flush it to the file.
248
self.commit_frame(force=True)
249
250
# Perform direct write of the header and payload of the large binary
251
# object. Be careful not to concatenate the header and the payload
252
# prior to calling 'write' as we do not want to allocate a large
253
# temporary bytes object.
254
# We intentionally do not insert a protocol 4 frame opcode to make
255
# it possible to optimize file.read calls in the loader.
256
write(header)
257
write(payload)
258
259
260
class _Unframer:
261
262
def __init__(self, file_read, file_readline, file_tell=None):
263
self.file_read = file_read
264
self.file_readline = file_readline
265
self.current_frame = None
266
267
def readinto(self, buf):
268
if self.current_frame:
269
n = self.current_frame.readinto(buf)
270
if n == 0 and len(buf) != 0:
271
self.current_frame = None
272
n = len(buf)
273
buf[:] = self.file_read(n)
274
return n
275
if n < len(buf):
276
raise UnpicklingError(
277
"pickle exhausted before end of frame")
278
return n
279
else:
280
n = len(buf)
281
buf[:] = self.file_read(n)
282
return n
283
284
def read(self, n):
285
if self.current_frame:
286
data = self.current_frame.read(n)
287
if not data and n != 0:
288
self.current_frame = None
289
return self.file_read(n)
290
if len(data) < n:
291
raise UnpicklingError(
292
"pickle exhausted before end of frame")
293
return data
294
else:
295
return self._chunked_file_read(n)
296
297
def readline(self):
298
if self.current_frame:
299
data = self.current_frame.readline()
300
if not data:
301
self.current_frame = None
302
return self.file_readline()
303
if data[-1] != b'\n'[0]:
304
raise UnpicklingError(
305
"pickle exhausted before end of frame")
306
return data
307
else:
308
return self.file_readline()
309
310
def _chunked_file_read(self, size):
311
cursize = min(size, _MIN_READ_BUF_SIZE)
312
b = self.file_read(cursize)
313
while cursize < size and len(b) == cursize:
314
delta = min(cursize, size - cursize)
315
b += self.file_read(delta)
316
cursize += delta
317
return b
318
319
def load_frame(self, frame_size):
320
if self.current_frame and self.current_frame.read() != b'':
321
raise UnpicklingError(
322
"beginning of a new frame before end of current frame")
323
data = self._chunked_file_read(frame_size)
324
if len(data) < frame_size:
325
raise EOFError
326
self.current_frame = io.BytesIO(data)
327
328
329
# Tools used for pickling.
330
331
def _getattribute(obj, dotted_path):
332
for subpath in dotted_path:
333
obj = getattr(obj, subpath)
334
return obj
335
336
def whichmodule(obj, name):
337
"""Find the module an object belong to."""
338
dotted_path = name.split('.')
339
module_name = getattr(obj, '__module__', None)
340
if '<locals>' in dotted_path:
341
raise PicklingError(f"Can't pickle local object {obj!r}")
342
if module_name is None:
343
# Protect the iteration by using a list copy of sys.modules against dynamic
344
# modules that trigger imports of other modules upon calls to getattr.
345
for module_name, module in sys.modules.copy().items():
346
if (module_name == '__main__'
347
or module_name == '__mp_main__' # bpo-42406
348
or module is None):
349
continue
350
try:
351
if _getattribute(module, dotted_path) is obj:
352
return module_name
353
except AttributeError:
354
pass
355
module_name = '__main__'
356
357
try:
358
__import__(module_name, level=0)
359
module = sys.modules[module_name]
360
except (ImportError, ValueError, KeyError) as exc:
361
raise PicklingError(f"Can't pickle {obj!r}: {exc!s}")
362
try:
363
if _getattribute(module, dotted_path) is obj:
364
return module_name
365
except AttributeError:
366
raise PicklingError(f"Can't pickle {obj!r}: "
367
f"it's not found as {module_name}.{name}")
368
369
raise PicklingError(
370
f"Can't pickle {obj!r}: it's not the same object as {module_name}.{name}")
371
372
def encode_long(x):
373
r"""Encode a long to a two's complement little-endian binary string.
374
Note that 0 is a special case, returning an empty string, to save a
375
byte in the LONG1 pickling context.
376
377
>>> encode_long(0)
378
b''
379
>>> encode_long(255)
380
b'\xff\x00'
381
>>> encode_long(32767)
382
b'\xff\x7f'
383
>>> encode_long(-256)
384
b'\x00\xff'
385
>>> encode_long(-32768)
386
b'\x00\x80'
387
>>> encode_long(-128)
388
b'\x80'
389
>>> encode_long(127)
390
b'\x7f'
391
>>>
392
"""
393
if x == 0:
394
return b''
395
nbytes = (x.bit_length() >> 3) + 1
396
result = x.to_bytes(nbytes, byteorder='little', signed=True)
397
if x < 0 and nbytes > 1:
398
if result[-1] == 0xff and (result[-2] & 0x80) != 0:
399
result = result[:-1]
400
return result
401
402
def decode_long(data):
403
r"""Decode a long from a two's complement little-endian binary string.
404
405
>>> decode_long(b'')
406
0
407
>>> decode_long(b"\xff\x00")
408
255
409
>>> decode_long(b"\xff\x7f")
410
32767
411
>>> decode_long(b"\x00\xff")
412
-256
413
>>> decode_long(b"\x00\x80")
414
-32768
415
>>> decode_long(b"\x80")
416
-128
417
>>> decode_long(b"\x7f")
418
127
419
"""
420
return int.from_bytes(data, byteorder='little', signed=True)
421
422
def _T(obj):
423
cls = type(obj)
424
module = cls.__module__
425
if module in (None, 'builtins', '__main__'):
426
return cls.__qualname__
427
return f'{module}.{cls.__qualname__}'
428
429
430
_NoValue = object()
431
432
# Pickling machinery
433
434
class _Pickler:
435
436
def __init__(self, file, protocol=None, *, fix_imports=True,
437
buffer_callback=None):
438
"""This takes a binary file for writing a pickle data stream.
439
440
The optional *protocol* argument tells the pickler to use the
441
given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
442
The default protocol is 5. It was introduced in Python 3.8, and
443
is incompatible with previous versions.
444
445
Specifying a negative protocol version selects the highest
446
protocol version supported. The higher the protocol used, the
447
more recent the version of Python needed to read the pickle
448
produced.
449
450
The *file* argument must have a write() method that accepts a
451
single bytes argument. It can thus be a file object opened for
452
binary writing, an io.BytesIO instance, or any other custom
453
object that meets this interface.
454
455
If *fix_imports* is True and *protocol* is less than 3, pickle
456
will try to map the new Python 3 names to the old module names
457
used in Python 2, so that the pickle data stream is readable
458
with Python 2.
459
460
If *buffer_callback* is None (the default), buffer views are
461
serialized into *file* as part of the pickle stream.
462
463
If *buffer_callback* is not None, then it can be called any number
464
of times with a buffer view. If the callback returns a false value
465
(such as None), the given buffer is out-of-band; otherwise the
466
buffer is serialized in-band, i.e. inside the pickle stream.
467
468
It is an error if *buffer_callback* is not None and *protocol*
469
is None or smaller than 5.
470
"""
471
if protocol is None:
472
protocol = DEFAULT_PROTOCOL
473
if protocol < 0:
474
protocol = HIGHEST_PROTOCOL
475
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
476
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
477
if buffer_callback is not None and protocol < 5:
478
raise ValueError("buffer_callback needs protocol >= 5")
479
self._buffer_callback = buffer_callback
480
try:
481
self._file_write = file.write
482
except AttributeError:
483
raise TypeError("file must have a 'write' attribute")
484
self.framer = _Framer(self._file_write)
485
self.write = self.framer.write
486
self._write_large_bytes = self.framer.write_large_bytes
487
self.memo = {}
488
self.proto = int(protocol)
489
self.bin = protocol >= 1
490
self.fast = 0
491
self.fix_imports = fix_imports and protocol < 3
492
493
def clear_memo(self):
494
"""Clears the pickler's "memo".
495
496
The memo is the data structure that remembers which objects the
497
pickler has already seen, so that shared or recursive objects
498
are pickled by reference and not by value. This method is
499
useful when re-using picklers.
500
"""
501
self.memo.clear()
502
503
def dump(self, obj):
504
"""Write a pickled representation of obj to the open file."""
505
# Check whether Pickler was initialized correctly. This is
506
# only needed to mimic the behavior of _pickle.Pickler.dump().
507
if not hasattr(self, "_file_write"):
508
raise PicklingError("Pickler.__init__() was not called by "
509
"%s.__init__()" % (self.__class__.__name__,))
510
if self.proto >= 2:
511
self.write(PROTO + pack("<B", self.proto))
512
if self.proto >= 4:
513
self.framer.start_framing()
514
self.save(obj)
515
self.write(STOP)
516
self.framer.end_framing()
517
518
def memoize(self, obj):
519
"""Store an object in the memo."""
520
521
# The Pickler memo is a dictionary mapping object ids to 2-tuples
522
# that contain the Unpickler memo key and the object being memoized.
523
# The memo key is written to the pickle and will become
524
# the key in the Unpickler's memo. The object is stored in the
525
# Pickler memo so that transient objects are kept alive during
526
# pickling.
527
528
# The use of the Unpickler memo length as the memo key is just a
529
# convention. The only requirement is that the memo values be unique.
530
# But there appears no advantage to any other scheme, and this
531
# scheme allows the Unpickler memo to be implemented as a plain (but
532
# growable) array, indexed by memo key.
533
if self.fast:
534
return
535
assert id(obj) not in self.memo
536
idx = len(self.memo)
537
self.write(self.put(idx))
538
self.memo[id(obj)] = idx, obj
539
540
# Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
541
def put(self, idx):
542
if self.proto >= 4:
543
return MEMOIZE
544
elif self.bin:
545
if idx < 256:
546
return BINPUT + pack("<B", idx)
547
else:
548
return LONG_BINPUT + pack("<I", idx)
549
else:
550
return PUT + repr(idx).encode("ascii") + b'\n'
551
552
# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
553
def get(self, i):
554
if self.bin:
555
if i < 256:
556
return BINGET + pack("<B", i)
557
else:
558
return LONG_BINGET + pack("<I", i)
559
560
return GET + repr(i).encode("ascii") + b'\n'
561
562
def save(self, obj, save_persistent_id=True):
563
self.framer.commit_frame()
564
565
# Check for persistent id (defined by a subclass)
566
if save_persistent_id:
567
pid = self.persistent_id(obj)
568
if pid is not None:
569
self.save_pers(pid)
570
return
571
572
# Check the memo
573
x = self.memo.get(id(obj))
574
if x is not None:
575
self.write(self.get(x[0]))
576
return
577
578
rv = NotImplemented
579
reduce = getattr(self, "reducer_override", _NoValue)
580
if reduce is not _NoValue:
581
rv = reduce(obj)
582
583
if rv is NotImplemented:
584
# Check the type dispatch table
585
t = type(obj)
586
f = self.dispatch.get(t)
587
if f is not None:
588
f(self, obj) # Call unbound method with explicit self
589
return
590
591
# Check private dispatch table if any, or else
592
# copyreg.dispatch_table
593
reduce = getattr(self, 'dispatch_table', dispatch_table).get(t, _NoValue)
594
if reduce is not _NoValue:
595
rv = reduce(obj)
596
else:
597
# Check for a class with a custom metaclass; treat as regular
598
# class
599
if issubclass(t, type):
600
self.save_global(obj)
601
return
602
603
# Check for a __reduce_ex__ method, fall back to __reduce__
604
reduce = getattr(obj, "__reduce_ex__", _NoValue)
605
if reduce is not _NoValue:
606
rv = reduce(self.proto)
607
else:
608
reduce = getattr(obj, "__reduce__", _NoValue)
609
if reduce is not _NoValue:
610
rv = reduce()
611
else:
612
raise PicklingError(f"Can't pickle {_T(t)} object")
613
614
# Check for string returned by reduce(), meaning "save as global"
615
if isinstance(rv, str):
616
self.save_global(obj, rv)
617
return
618
619
try:
620
# Assert that reduce() returned a tuple
621
if not isinstance(rv, tuple):
622
raise PicklingError(f'__reduce__ must return a string or tuple, not {_T(rv)}')
623
624
# Assert that it returned an appropriately sized tuple
625
l = len(rv)
626
if not (2 <= l <= 6):
627
raise PicklingError("tuple returned by __reduce__ "
628
"must contain 2 through 6 elements")
629
630
# Save the reduce() output and finally memoize the object
631
self.save_reduce(obj=obj, *rv)
632
except BaseException as exc:
633
exc.add_note(f'when serializing {_T(obj)} object')
634
raise
635
636
def persistent_id(self, obj):
637
# This exists so a subclass can override it
638
return None
639
640
def save_pers(self, pid):
641
# Save a persistent id reference
642
if self.bin:
643
self.save(pid, save_persistent_id=False)
644
self.write(BINPERSID)
645
else:
646
try:
647
self.write(PERSID + str(pid).encode("ascii") + b'\n')
648
except UnicodeEncodeError:
649
raise PicklingError(
650
"persistent IDs in protocol 0 must be ASCII strings")
651
652
def save_reduce(self, func, args, state=None, listitems=None,
653
dictitems=None, state_setter=None, *, obj=None):
654
# This API is called by some subclasses
655
656
if not callable(func):
657
raise PicklingError(f"first item of the tuple returned by __reduce__ "
658
f"must be callable, not {_T(func)}")
659
if not isinstance(args, tuple):
660
raise PicklingError(f"second item of the tuple returned by __reduce__ "
661
f"must be a tuple, not {_T(args)}")
662
663
save = self.save
664
write = self.write
665
666
func_name = getattr(func, "__name__", "")
667
if self.proto >= 2 and func_name == "__newobj_ex__":
668
cls, args, kwargs = args
669
if not hasattr(cls, "__new__"):
670
raise PicklingError("first argument to __newobj_ex__() has no __new__")
671
if obj is not None and cls is not obj.__class__:
672
raise PicklingError(f"first argument to __newobj_ex__() "
673
f"must be {obj.__class__!r}, not {cls!r}")
674
if self.proto >= 4:
675
try:
676
save(cls)
677
except BaseException as exc:
678
exc.add_note(f'when serializing {_T(obj)} class')
679
raise
680
try:
681
save(args)
682
save(kwargs)
683
except BaseException as exc:
684
exc.add_note(f'when serializing {_T(obj)} __new__ arguments')
685
raise
686
write(NEWOBJ_EX)
687
else:
688
func = partial(cls.__new__, cls, *args, **kwargs)
689
try:
690
save(func)
691
except BaseException as exc:
692
exc.add_note(f'when serializing {_T(obj)} reconstructor')
693
raise
694
save(())
695
write(REDUCE)
696
elif self.proto >= 2 and func_name == "__newobj__":
697
# A __reduce__ implementation can direct protocol 2 or newer to
698
# use the more efficient NEWOBJ opcode, while still
699
# allowing protocol 0 and 1 to work normally. For this to
700
# work, the function returned by __reduce__ should be
701
# called __newobj__, and its first argument should be a
702
# class. The implementation for __newobj__
703
# should be as follows, although pickle has no way to
704
# verify this:
705
#
706
# def __newobj__(cls, *args):
707
# return cls.__new__(cls, *args)
708
#
709
# Protocols 0 and 1 will pickle a reference to __newobj__,
710
# while protocol 2 (and above) will pickle a reference to
711
# cls, the remaining args tuple, and the NEWOBJ code,
712
# which calls cls.__new__(cls, *args) at unpickling time
713
# (see load_newobj below). If __reduce__ returns a
714
# three-tuple, the state from the third tuple item will be
715
# pickled regardless of the protocol, calling __setstate__
716
# at unpickling time (see load_build below).
717
#
718
# Note that no standard __newobj__ implementation exists;
719
# you have to provide your own. This is to enforce
720
# compatibility with Python 2.2 (pickles written using
721
# protocol 0 or 1 in Python 2.3 should be unpicklable by
722
# Python 2.2).
723
cls = args[0]
724
if not hasattr(cls, "__new__"):
725
raise PicklingError("first argument to __newobj__() has no __new__")
726
if obj is not None and cls is not obj.__class__:
727
raise PicklingError(f"first argument to __newobj__() "
728
f"must be {obj.__class__!r}, not {cls!r}")
729
args = args[1:]
730
try:
731
save(cls)
732
except BaseException as exc:
733
exc.add_note(f'when serializing {_T(obj)} class')
734
raise
735
try:
736
save(args)
737
except BaseException as exc:
738
exc.add_note(f'when serializing {_T(obj)} __new__ arguments')
739
raise
740
write(NEWOBJ)
741
else:
742
try:
743
save(func)
744
except BaseException as exc:
745
exc.add_note(f'when serializing {_T(obj)} reconstructor')
746
raise
747
try:
748
save(args)
749
except BaseException as exc:
750
exc.add_note(f'when serializing {_T(obj)} reconstructor arguments')
751
raise
752
write(REDUCE)
753
754
if obj is not None:
755
# If the object is already in the memo, this means it is
756
# recursive. In this case, throw away everything we put on the
757
# stack, and fetch the object back from the memo.
758
if id(obj) in self.memo:
759
write(POP + self.get(self.memo[id(obj)][0]))
760
else:
761
self.memoize(obj)
762
763
# More new special cases (that work with older protocols as
764
# well): when __reduce__ returns a tuple with 4 or 5 items,
765
# the 4th and 5th item should be iterators that provide list
766
# items and dict items (as (key, value) tuples), or None.
767
768
if listitems is not None:
769
self._batch_appends(listitems, obj)
770
771
if dictitems is not None:
772
self._batch_setitems(dictitems, obj)
773
774
if state is not None:
775
if state_setter is None:
776
try:
777
save(state)
778
except BaseException as exc:
779
exc.add_note(f'when serializing {_T(obj)} state')
780
raise
781
write(BUILD)
782
else:
783
# If a state_setter is specified, call it instead of load_build
784
# to update obj's with its previous state.
785
# First, push state_setter and its tuple of expected arguments
786
# (obj, state) onto the stack.
787
try:
788
save(state_setter)
789
except BaseException as exc:
790
exc.add_note(f'when serializing {_T(obj)} state setter')
791
raise
792
save(obj) # simple BINGET opcode as obj is already memoized.
793
try:
794
save(state)
795
except BaseException as exc:
796
exc.add_note(f'when serializing {_T(obj)} state')
797
raise
798
write(TUPLE2)
799
# Trigger a state_setter(obj, state) function call.
800
write(REDUCE)
801
# The purpose of state_setter is to carry-out an
802
# inplace modification of obj. We do not care about what the
803
# method might return, so its output is eventually removed from
804
# the stack.
805
write(POP)
806
807
# Methods below this point are dispatched through the dispatch table
808
809
dispatch = {}
810
811
def save_none(self, obj):
812
self.write(NONE)
813
dispatch[type(None)] = save_none
814
815
def save_bool(self, obj):
816
if self.proto >= 2:
817
self.write(NEWTRUE if obj else NEWFALSE)
818
else:
819
self.write(TRUE if obj else FALSE)
820
dispatch[bool] = save_bool
821
822
def save_long(self, obj):
823
if self.bin:
824
# If the int is small enough to fit in a signed 4-byte 2's-comp
825
# format, we can store it more efficiently than the general
826
# case.
827
# First one- and two-byte unsigned ints:
828
if obj >= 0:
829
if obj <= 0xff:
830
self.write(BININT1 + pack("<B", obj))
831
return
832
if obj <= 0xffff:
833
self.write(BININT2 + pack("<H", obj))
834
return
835
# Next check for 4-byte signed ints:
836
if -0x80000000 <= obj <= 0x7fffffff:
837
self.write(BININT + pack("<i", obj))
838
return
839
if self.proto >= 2:
840
encoded = encode_long(obj)
841
n = len(encoded)
842
if n < 256:
843
self.write(LONG1 + pack("<B", n) + encoded)
844
else:
845
self.write(LONG4 + pack("<i", n) + encoded)
846
return
847
if -0x80000000 <= obj <= 0x7fffffff:
848
self.write(INT + repr(obj).encode("ascii") + b'\n')
849
else:
850
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
851
dispatch[int] = save_long
852
853
def save_float(self, obj):
854
if self.bin:
855
self.write(BINFLOAT + pack('>d', obj))
856
else:
857
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
858
dispatch[float] = save_float
859
860
def _save_bytes_no_memo(self, obj):
861
# helper for writing bytes objects for protocol >= 3
862
# without memoizing them
863
assert self.proto >= 3
864
n = len(obj)
865
if n <= 0xff:
866
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
867
elif n > 0xffffffff and self.proto >= 4:
868
self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
869
elif n >= self.framer._FRAME_SIZE_TARGET:
870
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
871
else:
872
self.write(BINBYTES + pack("<I", n) + obj)
873
874
def save_bytes(self, obj):
875
if self.proto < 3:
876
if not obj: # bytes object is empty
877
self.save_reduce(bytes, (), obj=obj)
878
else:
879
self.save_reduce(codecs.encode,
880
(str(obj, 'latin1'), 'latin1'), obj=obj)
881
return
882
self._save_bytes_no_memo(obj)
883
self.memoize(obj)
884
dispatch[bytes] = save_bytes
885
886
def _save_bytearray_no_memo(self, obj):
887
# helper for writing bytearray objects for protocol >= 5
888
# without memoizing them
889
assert self.proto >= 5
890
n = len(obj)
891
if n >= self.framer._FRAME_SIZE_TARGET:
892
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
893
else:
894
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
895
896
def save_bytearray(self, obj):
897
if self.proto < 5:
898
if not obj: # bytearray is empty
899
self.save_reduce(bytearray, (), obj=obj)
900
else:
901
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
902
return
903
self._save_bytearray_no_memo(obj)
904
self.memoize(obj)
905
dispatch[bytearray] = save_bytearray
906
907
if _HAVE_PICKLE_BUFFER:
908
def save_picklebuffer(self, obj):
909
if self.proto < 5:
910
raise PicklingError("PickleBuffer can only be pickled with "
911
"protocol >= 5")
912
with obj.raw() as m:
913
if not m.contiguous:
914
raise PicklingError("PickleBuffer can not be pickled when "
915
"pointing to a non-contiguous buffer")
916
in_band = True
917
if self._buffer_callback is not None:
918
in_band = bool(self._buffer_callback(obj))
919
if in_band:
920
# Write data in-band
921
# XXX The C implementation avoids a copy here
922
buf = m.tobytes()
923
if m.readonly:
924
self._save_bytes_no_memo(buf)
925
else:
926
self._save_bytearray_no_memo(buf)
927
self.memoize(obj)
928
else:
929
# Write data out-of-band
930
self.write(NEXT_BUFFER)
931
if m.readonly:
932
self.write(READONLY_BUFFER)
933
934
dispatch[PickleBuffer] = save_picklebuffer
935
936
def save_str(self, obj):
937
if self.bin:
938
encoded = obj.encode('utf-8', 'surrogatepass')
939
n = len(encoded)
940
if n <= 0xff and self.proto >= 4:
941
self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
942
elif n > 0xffffffff and self.proto >= 4:
943
self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
944
elif n >= self.framer._FRAME_SIZE_TARGET:
945
self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
946
else:
947
self.write(BINUNICODE + pack("<I", n) + encoded)
948
else:
949
# Escape what raw-unicode-escape doesn't, but memoize the original.
950
tmp = obj.replace("\\", "\\u005c")
951
tmp = tmp.replace("\0", "\\u0000")
952
tmp = tmp.replace("\n", "\\u000a")
953
tmp = tmp.replace("\r", "\\u000d")
954
tmp = tmp.replace("\x1a", "\\u001a") # EOF on DOS
955
self.write(UNICODE + tmp.encode('raw-unicode-escape') + b'\n')
956
self.memoize(obj)
957
dispatch[str] = save_str
958
959
def save_tuple(self, obj):
960
if not obj: # tuple is empty
961
if self.bin:
962
self.write(EMPTY_TUPLE)
963
else:
964
self.write(MARK + TUPLE)
965
return
966
967
n = len(obj)
968
save = self.save
969
memo = self.memo
970
if n <= 3 and self.proto >= 2:
971
for i, element in enumerate(obj):
972
try:
973
save(element)
974
except BaseException as exc:
975
exc.add_note(f'when serializing {_T(obj)} item {i}')
976
raise
977
# Subtle. Same as in the big comment below.
978
if id(obj) in memo:
979
get = self.get(memo[id(obj)][0])
980
self.write(POP * n + get)
981
else:
982
self.write(_tuplesize2code[n])
983
self.memoize(obj)
984
return
985
986
# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
987
# has more than 3 elements.
988
write = self.write
989
write(MARK)
990
for i, element in enumerate(obj):
991
try:
992
save(element)
993
except BaseException as exc:
994
exc.add_note(f'when serializing {_T(obj)} item {i}')
995
raise
996
997
if id(obj) in memo:
998
# Subtle. d was not in memo when we entered save_tuple(), so
999
# the process of saving the tuple's elements must have saved
1000
# the tuple itself: the tuple is recursive. The proper action