-
-
Notifications
You must be signed in to change notification settings - Fork 34.6k
Expand file tree
/
Copy pathpickle.py
More file actions
1820 lines (1572 loc) · 63.4 KB
/
pickle.py
File metadata and controls
1820 lines (1572 loc) · 63.4 KB
Edit and raw actions
OlderNewer
1
"""Create portable serialized representations of Python objects.
2
3
See module copyreg for a mechanism for registering custom picklers.
4
See module pickletools source for extensive comments.
5
6
Classes:
7
8
Pickler
9
Unpickler
10
11
Functions:
12
13
dump(object, file)
14
dumps(object) -> string
15
load(file) -> object
16
loads(bytes) -> object
17
18
Misc variables:
19
20
__version__
21
format_version
22
compatible_formats
23
24
"""
25
26
from types import FunctionType
27
from copyreg import dispatch_table
28
from copyreg import _extension_registry, _inverted_registry, _extension_cache
29
from itertools import islice
30
from functools import partial
31
import sys
32
from sys import maxsize
33
from struct import pack, unpack
34
import re
35
import io
36
import codecs
37
import _compat_pickle
38
39
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40
"Unpickler", "dump", "dumps", "load", "loads"]
41
42
try:
43
from _pickle import PickleBuffer
44
__all__.append("PickleBuffer")
45
_HAVE_PICKLE_BUFFER = True
46
except ImportError:
47
_HAVE_PICKLE_BUFFER = False
48
49
50
# Shortcut for use in isinstance testing
51
bytes_types = (bytes, bytearray)
52
53
# These are purely informational; no code uses these.
54
format_version = "4.0" # File format version we write
55
compatible_formats = ["1.0", # Original protocol 0
56
"1.1", # Protocol 0 with INST added
57
"1.2", # Original protocol 1
58
"1.3", # Protocol 1 with BINFLOAT added
59
"2.0", # Protocol 2
60
"3.0", # Protocol 3
61
"4.0", # Protocol 4
62
"5.0", # Protocol 5
63
] # Old format versions we can read
64
65
# This is the highest protocol number we know how to read.
66
HIGHEST_PROTOCOL = 5
67
68
# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
69
# Only bump this if the oldest still supported version of Python already
70
# includes it.
71
DEFAULT_PROTOCOL = 4
72
73
class PickleError(Exception):
74
"""A common base class for the other pickling exceptions."""
75
pass
76
77
class PicklingError(PickleError):
78
"""This exception is raised when an unpicklable object is passed to the
79
dump() method.
80
81
"""
82
pass
83
84
class UnpicklingError(PickleError):
85
"""This exception is raised when there is a problem unpickling an object,
86
such as a security violation.
87
88
Note that other exceptions may also be raised during unpickling, including
89
(but not necessarily limited to) AttributeError, EOFError, ImportError,
90
and IndexError.
91
92
"""
93
pass
94
95
# An instance of _Stop is raised by Unpickler.load_stop() in response to
96
# the STOP opcode, passing the object that is the result of unpickling.
97
class _Stop(Exception):
98
def __init__(self, value):
99
self.value = value
100
101
# Jython has PyStringMap; it's a dict subclass with string keys
102
try:
103
from org.python.core import PyStringMap
104
except ImportError:
105
PyStringMap = None
106
107
# Pickle opcodes. See pickletools.py for extensive docs. The listing
108
# here is in kind-of alphabetical order of 1-character pickle code.
109
# pickletools groups them by purpose.
110
111
MARK = b'(' # push special markobject on stack
112
STOP = b'.' # every pickle ends with STOP
113
POP = b'0' # discard topmost stack item
114
POP_MARK = b'1' # discard stack top through topmost markobject
115
DUP = b'2' # duplicate top stack item
116
FLOAT = b'F' # push float object; decimal string argument
117
INT = b'I' # push integer or bool; decimal string argument
118
BININT = b'J' # push four-byte signed int
119
BININT1 = b'K' # push 1-byte unsigned int
120
LONG = b'L' # push long; decimal string argument
121
BININT2 = b'M' # push 2-byte unsigned int
122
NONE = b'N' # push None
123
PERSID = b'P' # push persistent object; id is taken from string arg
124
BINPERSID = b'Q' # " " " ; " " " " stack
125
REDUCE = b'R' # apply callable to argtuple, both on stack
126
STRING = b'S' # push string; NL-terminated string argument
127
BINSTRING = b'T' # push string; counted binary string argument
128
SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
129
UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
130
BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
131
APPEND = b'a' # append stack top to list below it
132
BUILD = b'b' # call __setstate__ or __dict__.update()
133
GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
134
DICT = b'd' # build a dict from stack items
135
EMPTY_DICT = b'}' # push empty dict
136
APPENDS = b'e' # extend list on stack by topmost stack slice
137
GET = b'g' # push item from memo on stack; index is string arg
138
BINGET = b'h' # " " " " " " ; " " 1-byte arg
139
INST = b'i' # build & push class instance
140
LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
141
LIST = b'l' # build list from topmost stack items
142
EMPTY_LIST = b']' # push empty list
143
OBJ = b'o' # build & push class instance
144
PUT = b'p' # store stack top in memo; index is string arg
145
BINPUT = b'q' # " " " " " ; " " 1-byte arg
146
LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
147
SETITEM = b's' # add key+value pair to dict
148
TUPLE = b't' # build tuple from topmost stack items
149
EMPTY_TUPLE = b')' # push empty tuple
150
SETITEMS = b'u' # modify dict by adding topmost key+value pairs
151
BINFLOAT = b'G' # push float; arg is 8-byte float encoding
152
153
TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
154
FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
155
156
# Protocol 2
157
158
PROTO = b'\x80' # identify pickle protocol
159
NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
160
EXT1 = b'\x82' # push object from extension registry; 1-byte index
161
EXT2 = b'\x83' # ditto, but 2-byte index
162
EXT4 = b'\x84' # ditto, but 4-byte index
163
TUPLE1 = b'\x85' # build 1-tuple from stack top
164
TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
165
TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
166
NEWTRUE = b'\x88' # push True
167
NEWFALSE = b'\x89' # push False
168
LONG1 = b'\x8a' # push long from < 256 bytes
169
LONG4 = b'\x8b' # push really big long
170
171
_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
172
173
# Protocol 3 (Python 3.x)
174
175
BINBYTES = b'B' # push bytes; counted binary string argument
176
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
177
178
# Protocol 4
179
180
SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
181
BINUNICODE8 = b'\x8d' # push very long string
182
BINBYTES8 = b'\x8e' # push very long bytes string
183
EMPTY_SET = b'\x8f' # push empty set on the stack
184
ADDITEMS = b'\x90' # modify set by adding topmost stack items
185
FROZENSET = b'\x91' # build frozenset from topmost stack items
186
NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
187
STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
188
MEMOIZE = b'\x94' # store top of the stack in memo
189
FRAME = b'\x95' # indicate the beginning of a new frame
190
191
# Protocol 5
192
193
BYTEARRAY8 = b'\x96' # push bytearray
194
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
195
READONLY_BUFFER = b'\x98' # make top of stack readonly
196
197
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
198
199
200
class _Framer:
201
202
_FRAME_SIZE_MIN = 4
203
_FRAME_SIZE_TARGET = 64 * 1024
204
205
def __init__(self, file_write):
206
self.file_write = file_write
207
self.current_frame = None
208
209
def start_framing(self):
210
self.current_frame = io.BytesIO()
211
212
def end_framing(self):
213
if self.current_frame and self.current_frame.tell() > 0:
214
self.commit_frame(force=True)
215
self.current_frame = None
216
217
def commit_frame(self, force=False):
218
if self.current_frame:
219
f = self.current_frame
220
if f.tell() >= self._FRAME_SIZE_TARGET or force:
221
data = f.getbuffer()
222
write = self.file_write
223
if len(data) >= self._FRAME_SIZE_MIN:
224
# Issue a single call to the write method of the underlying
225
# file object for the frame opcode with the size of the
226
# frame. The concatenation is expected to be less expensive
227
# than issuing an additional call to write.
228
write(FRAME + pack("<Q", len(data)))
229
230
# Issue a separate call to write to append the frame
231
# contents without concatenation to the above to avoid a
232
# memory copy.
233
write(data)
234
235
# Start the new frame with a new io.BytesIO instance so that
236
# the file object can have delayed access to the previous frame
237
# contents via an unreleased memoryview of the previous
238
# io.BytesIO instance.
239
self.current_frame = io.BytesIO()
240
241
def write(self, data):
242
if self.current_frame:
243
return self.current_frame.write(data)
244
else:
245
return self.file_write(data)
246
247
def write_large_bytes(self, header, payload):
248
write = self.file_write
249
if self.current_frame:
250
# Terminate the current frame and flush it to the file.
251
self.commit_frame(force=True)
252
253
# Perform direct write of the header and payload of the large binary
254
# object. Be careful not to concatenate the header and the payload
255
# prior to calling 'write' as we do not want to allocate a large
256
# temporary bytes object.
257
# We intentionally do not insert a protocol 4 frame opcode to make
258
# it possible to optimize file.read calls in the loader.
259
write(header)
260
write(payload)
261
262
263
class _Unframer:
264
265
def __init__(self, file_read, file_readline, file_tell=None):
266
self.file_read = file_read
267
self.file_readline = file_readline
268
self.current_frame = None
269
270
def readinto(self, buf):
271
if self.current_frame:
272
n = self.current_frame.readinto(buf)
273
if n == 0 and len(buf) != 0:
274
self.current_frame = None
275
n = len(buf)
276
buf[:] = self.file_read(n)
277
return n
278
if n < len(buf):
279
raise UnpicklingError(
280
"pickle exhausted before end of frame")
281
return n
282
else:
283
n = len(buf)
284
buf[:] = self.file_read(n)
285
return n
286
287
def read(self, n):
288
if self.current_frame:
289
data = self.current_frame.read(n)
290
if not data and n != 0:
291
self.current_frame = None
292
return self.file_read(n)
293
if len(data) < n:
294
raise UnpicklingError(
295
"pickle exhausted before end of frame")
296
return data
297
else:
298
return self.file_read(n)
299
300
def readline(self):
301
if self.current_frame:
302
data = self.current_frame.readline()
303
if not data:
304
self.current_frame = None
305
return self.file_readline()
306
if data[-1] != b'\n'[0]:
307
raise UnpicklingError(
308
"pickle exhausted before end of frame")
309
return data
310
else:
311
return self.file_readline()
312
313
def load_frame(self, frame_size):
314
if self.current_frame and self.current_frame.read() != b'':
315
raise UnpicklingError(
316
"beginning of a new frame before end of current frame")
317
self.current_frame = io.BytesIO(self.file_read(frame_size))
318
319
320
# Tools used for pickling.
321
322
def _getattribute(obj, name):
323
for subpath in name.split('.'):
324
if subpath == '<locals>':
325
raise AttributeError("Can't get local attribute {!r} on {!r}"
326
.format(name, obj))
327
try:
328
parent = obj
329
obj = getattr(obj, subpath)
330
except AttributeError:
331
raise AttributeError("Can't get attribute {!r} on {!r}"
332
.format(name, obj)) from None
333
return obj, parent
334
335
def whichmodule(obj, name):
336
"""Find the module an object belong to."""
337
module_name = getattr(obj, '__module__', None)
338
if module_name is not None:
339
return module_name
340
# Protect the iteration by using a list copy of sys.modules against dynamic
341
# modules that trigger imports of other modules upon calls to getattr.
342
for module_name, module in sys.modules.copy().items():
343
if (module_name == '__main__'
344
or module_name == '__mp_main__' # bpo-42406
345
or module is None):
346
continue
347
try:
348
if _getattribute(module, name)[0] is obj:
349
return module_name
350
except AttributeError:
351
pass
352
return '__main__'
353
354
def encode_long(x):
355
r"""Encode a long to a two's complement little-endian binary string.
356
Note that 0 is a special case, returning an empty string, to save a
357
byte in the LONG1 pickling context.
358
359
>>> encode_long(0)
360
b''
361
>>> encode_long(255)
362
b'\xff\x00'
363
>>> encode_long(32767)
364
b'\xff\x7f'
365
>>> encode_long(-256)
366
b'\x00\xff'
367
>>> encode_long(-32768)
368
b'\x00\x80'
369
>>> encode_long(-128)
370
b'\x80'
371
>>> encode_long(127)
372
b'\x7f'
373
>>>
374
"""
375
if x == 0:
376
return b''
377
nbytes = (x.bit_length() >> 3) + 1
378
result = x.to_bytes(nbytes, byteorder='little', signed=True)
379
if x < 0 and nbytes > 1:
380
if result[-1] == 0xff and (result[-2] & 0x80) != 0:
381
result = result[:-1]
382
return result
383
384
def decode_long(data):
385
r"""Decode a long from a two's complement little-endian binary string.
386
387
>>> decode_long(b'')
388
0
389
>>> decode_long(b"\xff\x00")
390
255
391
>>> decode_long(b"\xff\x7f")
392
32767
393
>>> decode_long(b"\x00\xff")
394
-256
395
>>> decode_long(b"\x00\x80")
396
-32768
397
>>> decode_long(b"\x80")
398
-128
399
>>> decode_long(b"\x7f")
400
127
401
"""
402
return int.from_bytes(data, byteorder='little', signed=True)
403
404
405
# Pickling machinery
406
407
class _Pickler:
408
409
def __init__(self, file, protocol=None, *, fix_imports=True,
410
buffer_callback=None):
411
"""This takes a binary file for writing a pickle data stream.
412
413
The optional *protocol* argument tells the pickler to use the
414
given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
415
The default protocol is 4. It was introduced in Python 3.4, and
416
is incompatible with previous versions.
417
418
Specifying a negative protocol version selects the highest
419
protocol version supported. The higher the protocol used, the
420
more recent the version of Python needed to read the pickle
421
produced.
422
423
The *file* argument must have a write() method that accepts a
424
single bytes argument. It can thus be a file object opened for
425
binary writing, an io.BytesIO instance, or any other custom
426
object that meets this interface.
427
428
If *fix_imports* is True and *protocol* is less than 3, pickle
429
will try to map the new Python 3 names to the old module names
430
used in Python 2, so that the pickle data stream is readable
431
with Python 2.
432
433
If *buffer_callback* is None (the default), buffer views are
434
serialized into *file* as part of the pickle stream.
435
436
If *buffer_callback* is not None, then it can be called any number
437
of times with a buffer view. If the callback returns a false value
438
(such as None), the given buffer is out-of-band; otherwise the
439
buffer is serialized in-band, i.e. inside the pickle stream.
440
441
It is an error if *buffer_callback* is not None and *protocol*
442
is None or smaller than 5.
443
"""
444
if protocol is None:
445
protocol = DEFAULT_PROTOCOL
446
if protocol < 0:
447
protocol = HIGHEST_PROTOCOL
448
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
449
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
450
if buffer_callback is not None and protocol < 5:
451
raise ValueError("buffer_callback needs protocol >= 5")
452
self._buffer_callback = buffer_callback
453
try:
454
self._file_write = file.write
455
except AttributeError:
456
raise TypeError("file must have a 'write' attribute")
457
self.framer = _Framer(self._file_write)
458
self.write = self.framer.write
459
self._write_large_bytes = self.framer.write_large_bytes
460
self.memo = {}
461
self.proto = int(protocol)
462
self.bin = protocol >= 1
463
self.fast = 0
464
self.fix_imports = fix_imports and protocol < 3
465
466
def clear_memo(self):
467
"""Clears the pickler's "memo".
468
469
The memo is the data structure that remembers which objects the
470
pickler has already seen, so that shared or recursive objects
471
are pickled by reference and not by value. This method is
472
useful when re-using picklers.
473
"""
474
self.memo.clear()
475
476
def dump(self, obj):
477
"""Write a pickled representation of obj to the open file."""
478
# Check whether Pickler was initialized correctly. This is
479
# only needed to mimic the behavior of _pickle.Pickler.dump().
480
if not hasattr(self, "_file_write"):
481
raise PicklingError("Pickler.__init__() was not called by "
482
"%s.__init__()" % (self.__class__.__name__,))
483
if self.proto >= 2:
484
self.write(PROTO + pack("<B", self.proto))
485
if self.proto >= 4:
486
self.framer.start_framing()
487
self.save(obj)
488
self.write(STOP)
489
self.framer.end_framing()
490
491
def memoize(self, obj):
492
"""Store an object in the memo."""
493
494
# The Pickler memo is a dictionary mapping object ids to 2-tuples
495
# that contain the Unpickler memo key and the object being memoized.
496
# The memo key is written to the pickle and will become
497
# the key in the Unpickler's memo. The object is stored in the
498
# Pickler memo so that transient objects are kept alive during
499
# pickling.
500
501
# The use of the Unpickler memo length as the memo key is just a
502
# convention. The only requirement is that the memo values be unique.
503
# But there appears no advantage to any other scheme, and this
504
# scheme allows the Unpickler memo to be implemented as a plain (but
505
# growable) array, indexed by memo key.
506
if self.fast:
507
return
508
assert id(obj) not in self.memo
509
idx = len(self.memo)
510
self.write(self.put(idx))
511
self.memo[id(obj)] = idx, obj
512
513
# Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
514
def put(self, idx):
515
if self.proto >= 4:
516
return MEMOIZE
517
elif self.bin:
518
if idx < 256:
519
return BINPUT + pack("<B", idx)
520
else:
521
return LONG_BINPUT + pack("<I", idx)
522
else:
523
return PUT + repr(idx).encode("ascii") + b'\n'
524
525
# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
526
def get(self, i):
527
if self.bin:
528
if i < 256:
529
return BINGET + pack("<B", i)
530
else:
531
return LONG_BINGET + pack("<I", i)
532
533
return GET + repr(i).encode("ascii") + b'\n'
534
535
def save(self, obj, save_persistent_id=True):
536
self.framer.commit_frame()
537
538
# Check for persistent id (defined by a subclass)
539
pid = self.persistent_id(obj)
540
if pid is not None and save_persistent_id:
541
self.save_pers(pid)
542
return
543
544
# Check the memo
545
x = self.memo.get(id(obj))
546
if x is not None:
547
self.write(self.get(x[0]))
548
return
549
550
rv = NotImplemented
551
reduce = getattr(self, "reducer_override", None)
552
if reduce is not None:
553
rv = reduce(obj)
554
555
if rv is NotImplemented:
556
# Check the type dispatch table
557
t = type(obj)
558
f = self.dispatch.get(t)
559
if f is not None:
560
f(self, obj) # Call unbound method with explicit self
561
return
562
563
# Check private dispatch table if any, or else
564
# copyreg.dispatch_table
565
reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
566
if reduce is not None:
567
rv = reduce(obj)
568
else:
569
# Check for a class with a custom metaclass; treat as regular
570
# class
571
if issubclass(t, type):
572
self.save_global(obj)
573
return
574
575
# Check for a __reduce_ex__ method, fall back to __reduce__
576
reduce = getattr(obj, "__reduce_ex__", None)
577
if reduce is not None:
578
rv = reduce(self.proto)
579
else:
580
reduce = getattr(obj, "__reduce__", None)
581
if reduce is not None:
582
rv = reduce()
583
else:
584
raise PicklingError("Can't pickle %r object: %r" %
585
(t.__name__, obj))
586
587
# Check for string returned by reduce(), meaning "save as global"
588
if isinstance(rv, str):
589
self.save_global(obj, rv)
590
return
591
592
# Assert that reduce() returned a tuple
593
if not isinstance(rv, tuple):
594
raise PicklingError("%s must return string or tuple" % reduce)
595
596
# Assert that it returned an appropriately sized tuple
597
l = len(rv)
598
if not (2 <= l <= 6):
599
raise PicklingError("Tuple returned by %s must have "
600
"two to six elements" % reduce)
601
602
# Save the reduce() output and finally memoize the object
603
self.save_reduce(obj=obj, *rv)
604
605
def persistent_id(self, obj):
606
# This exists so a subclass can override it
607
return None
608
609
def save_pers(self, pid):
610
# Save a persistent id reference
611
if self.bin:
612
self.save(pid, save_persistent_id=False)
613
self.write(BINPERSID)
614
else:
615
try:
616
self.write(PERSID + str(pid).encode("ascii") + b'\n')
617
except UnicodeEncodeError:
618
raise PicklingError(
619
"persistent IDs in protocol 0 must be ASCII strings")
620
621
def save_reduce(self, func, args, state=None, listitems=None,
622
dictitems=None, state_setter=None, *, obj=None):
623
# This API is called by some subclasses
624
625
if not isinstance(args, tuple):
626
raise PicklingError("args from save_reduce() must be a tuple")
627
if not callable(func):
628
raise PicklingError("func from save_reduce() must be callable")
629
630
save = self.save
631
write = self.write
632
633
func_name = getattr(func, "__name__", "")
634
if self.proto >= 2 and func_name == "__newobj_ex__":
635
cls, args, kwargs = args
636
if not hasattr(cls, "__new__"):
637
raise PicklingError("args[0] from {} args has no __new__"
638
.format(func_name))
639
if obj is not None and cls is not obj.__class__:
640
raise PicklingError("args[0] from {} args has the wrong class"
641
.format(func_name))
642
if self.proto >= 4:
643
save(cls)
644
save(args)
645
save(kwargs)
646
write(NEWOBJ_EX)
647
else:
648
func = partial(cls.__new__, cls, *args, **kwargs)
649
save(func)
650
save(())
651
write(REDUCE)
652
elif self.proto >= 2 and func_name == "__newobj__":
653
# A __reduce__ implementation can direct protocol 2 or newer to
654
# use the more efficient NEWOBJ opcode, while still
655
# allowing protocol 0 and 1 to work normally. For this to
656
# work, the function returned by __reduce__ should be
657
# called __newobj__, and its first argument should be a
658
# class. The implementation for __newobj__
659
# should be as follows, although pickle has no way to
660
# verify this:
661
#
662
# def __newobj__(cls, *args):
663
# return cls.__new__(cls, *args)
664
#
665
# Protocols 0 and 1 will pickle a reference to __newobj__,
666
# while protocol 2 (and above) will pickle a reference to
667
# cls, the remaining args tuple, and the NEWOBJ code,
668
# which calls cls.__new__(cls, *args) at unpickling time
669
# (see load_newobj below). If __reduce__ returns a
670
# three-tuple, the state from the third tuple item will be
671
# pickled regardless of the protocol, calling __setstate__
672
# at unpickling time (see load_build below).
673
#
674
# Note that no standard __newobj__ implementation exists;
675
# you have to provide your own. This is to enforce
676
# compatibility with Python 2.2 (pickles written using
677
# protocol 0 or 1 in Python 2.3 should be unpicklable by
678
# Python 2.2).
679
cls = args[0]
680
if not hasattr(cls, "__new__"):
681
raise PicklingError(
682
"args[0] from __newobj__ args has no __new__")
683
if obj is not None and cls is not obj.__class__:
684
raise PicklingError(
685
"args[0] from __newobj__ args has the wrong class")
686
args = args[1:]
687
save(cls)
688
save(args)
689
write(NEWOBJ)
690
else:
691
save(func)
692
save(args)
693
write(REDUCE)
694
695
if obj is not None:
696
# If the object is already in the memo, this means it is
697
# recursive. In this case, throw away everything we put on the
698
# stack, and fetch the object back from the memo.
699
if id(obj) in self.memo:
700
write(POP + self.get(self.memo[id(obj)][0]))
701
else:
702
self.memoize(obj)
703
704
# More new special cases (that work with older protocols as
705
# well): when __reduce__ returns a tuple with 4 or 5 items,
706
# the 4th and 5th item should be iterators that provide list
707
# items and dict items (as (key, value) tuples), or None.
708
709
if listitems is not None:
710
self._batch_appends(listitems)
711
712
if dictitems is not None:
713
self._batch_setitems(dictitems)
714
715
if state is not None:
716
if state_setter is None:
717
save(state)
718
write(BUILD)
719
else:
720
# If a state_setter is specified, call it instead of load_build
721
# to update obj's with its previous state.
722
# First, push state_setter and its tuple of expected arguments
723
# (obj, state) onto the stack.
724
save(state_setter)
725
save(obj) # simple BINGET opcode as obj is already memoized.
726
save(state)
727
write(TUPLE2)
728
# Trigger a state_setter(obj, state) function call.
729
write(REDUCE)
730
# The purpose of state_setter is to carry-out an
731
# inplace modification of obj. We do not care about what the
732
# method might return, so its output is eventually removed from
733
# the stack.
734
write(POP)
735
736
# Methods below this point are dispatched through the dispatch table
737
738
dispatch = {}
739
740
def save_none(self, obj):
741
self.write(NONE)
742
dispatch[type(None)] = save_none
743
744
def save_bool(self, obj):
745
if self.proto >= 2:
746
self.write(NEWTRUE if obj else NEWFALSE)
747
else:
748
self.write(TRUE if obj else FALSE)
749
dispatch[bool] = save_bool
750
751
def save_long(self, obj):
752
if self.bin:
753
# If the int is small enough to fit in a signed 4-byte 2's-comp
754
# format, we can store it more efficiently than the general
755
# case.
756
# First one- and two-byte unsigned ints:
757
if obj >= 0:
758
if obj <= 0xff:
759
self.write(BININT1 + pack("<B", obj))
760
return
761
if obj <= 0xffff:
762
self.write(BININT2 + pack("<H", obj))
763
return
764
# Next check for 4-byte signed ints:
765
if -0x80000000 <= obj <= 0x7fffffff:
766
self.write(BININT + pack("<i", obj))
767
return
768
if self.proto >= 2:
769
encoded = encode_long(obj)
770
n = len(encoded)
771
if n < 256:
772
self.write(LONG1 + pack("<B", n) + encoded)
773
else:
774
self.write(LONG4 + pack("<i", n) + encoded)
775
return
776
if -0x80000000 <= obj <= 0x7fffffff:
777
self.write(INT + repr(obj).encode("ascii") + b'\n')
778
else:
779
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
780
dispatch[int] = save_long
781
782
def save_float(self, obj):
783
if self.bin:
784
self.write(BINFLOAT + pack('>d', obj))
785
else:
786
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
787
dispatch[float] = save_float
788
789
def save_bytes(self, obj):
790
if self.proto < 3:
791
if not obj: # bytes object is empty
792
self.save_reduce(bytes, (), obj=obj)
793
else:
794
self.save_reduce(codecs.encode,
795
(str(obj, 'latin1'), 'latin1'), obj=obj)
796
return
797
n = len(obj)
798
if n <= 0xff:
799
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
800
elif n > 0xffffffff and self.proto >= 4:
801
self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
802
elif n >= self.framer._FRAME_SIZE_TARGET:
803
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
804
else:
805
self.write(BINBYTES + pack("<I", n) + obj)
806
self.memoize(obj)
807
dispatch[bytes] = save_bytes
808
809
def save_bytearray(self, obj):
810
if self.proto < 5:
811
if not obj: # bytearray is empty
812
self.save_reduce(bytearray, (), obj=obj)
813
else:
814
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
815
return
816
n = len(obj)
817
if n >= self.framer._FRAME_SIZE_TARGET:
818
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
819
else:
820
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
821
self.memoize(obj)
822
dispatch[bytearray] = save_bytearray
823
824
if _HAVE_PICKLE_BUFFER:
825
def save_picklebuffer(self, obj):
826
if self.proto < 5:
827
raise PicklingError("PickleBuffer can only pickled with "
828
"protocol >= 5")
829
with obj.raw() as m:
830
if not m.contiguous:
831
raise PicklingError("PickleBuffer can not be pickled when "
832
"pointing to a non-contiguous buffer")
833
in_band = True
834
if self._buffer_callback is not None:
835
in_band = bool(self._buffer_callback(obj))
836
if in_band:
837
# Write data in-band
838
# XXX The C implementation avoids a copy here
839
if m.readonly:
840
self.save_bytes(m.tobytes())
841
else:
842
self.save_bytearray(m.tobytes())
843
else:
844
# Write data out-of-band
845
self.write(NEXT_BUFFER)
846
if m.readonly:
847
self.write(READONLY_BUFFER)
848
849
dispatch[PickleBuffer] = save_picklebuffer
850
851
def save_str(self, obj):
852
if self.bin:
853
encoded = obj.encode('utf-8', 'surrogatepass')
854
n = len(encoded)
855
if n <= 0xff and self.proto >= 4:
856
self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
857
elif n > 0xffffffff and self.proto >= 4:
858
self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
859
elif n >= self.framer._FRAME_SIZE_TARGET:
860
self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
861
else:
862
self.write(BINUNICODE + pack("<I", n) + encoded)
863
else:
864
obj = obj.replace("\\", "\\u005c")
865
obj = obj.replace("\0", "\\u0000")
866
obj = obj.replace("\n", "\\u000a")
867
obj = obj.replace("\r", "\\u000d")
868
obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
869
self.write(UNICODE + obj.encode('raw-unicode-escape') +
870
b'\n')
871
self.memoize(obj)
872
dispatch[str] = save_str
873
874
def save_tuple(self, obj):
875
if not obj: # tuple is empty
876
if self.bin:
877
self.write(EMPTY_TUPLE)
878
else:
879
self.write(MARK + TUPLE)
880
return
881
882
n = len(obj)
883
save = self.save
884
memo = self.memo
885
if n <= 3 and self.proto >= 2:
886
for element in obj:
887
save(element)
888
# Subtle. Same as in the big comment below.
889
if id(obj) in memo:
890
get = self.get(memo[id(obj)][0])
891
self.write(POP * n + get)
892
else:
893
self.write(_tuplesize2code[n])
894
self.memoize(obj)
895
return
896
897
# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
898
# has more than 3 elements.
899
write = self.write
900
write(MARK)
901
for element in obj:
902
save(element)
903
904
if id(obj) in memo:
905
# Subtle. d was not in memo when we entered save_tuple(), so
906
# the process of saving the tuple's elements must have saved
907
# the tuple itself: the tuple is recursive. The proper action
908
# now is to throw away everything we put on the stack, and
909
# simply GET the tuple (it's already constructed). This check
910
# could have been done in the "for element" loop instead, but
911
# recursive tuples are a rare thing.
912
get = self.get(memo[id(obj)][0])
913
if self.bin:
914
write(POP_MARK + get)
915
else: # proto 0 -- POP_MARK not available
916
write(POP * (n+1) + get)
917
return
918
919
# No recursion.
920
write(TUPLE)
921
self.memoize(obj)
922
923
dispatch[tuple] = save_tuple
924
925
def save_list(self, obj):
926
if self.bin:
927
self.write(EMPTY_LIST)
928
else: # proto 0 -- can't use EMPTY_LIST
929
self.write(MARK + LIST)
930
931
self.memoize(obj)
932
self._batch_appends(obj)
933
934
dispatch[list] = save_list
935
936
_BATCHSIZE = 1000
937
938
def _batch_appends(self, items):
939
# Helper to batch up APPENDS sequences
940
save = self.save
941
write = self.write
942
943
if not self.bin:
944
for x in items:
945
save(x)
946
write(APPEND)
947
return
948
949
it = iter(items)
950
while True:
951
tmp = list(islice(it, self._BATCHSIZE))
952
n = len(tmp)
953
if n > 1:
954
write(MARK)
955
for x in tmp:
956
save(x)
957
write(APPENDS)
958
elif n:
959
save(tmp[0])
960
write(APPEND)
961
# else tmp is empty, and we're done
962
if n < self._BATCHSIZE:
963
return
964
965
def save_dict(self, obj):
966
if self.bin:
967
self.write(EMPTY_DICT)
968
else: # proto 0 -- can't use EMPTY_DICT
969
self.write(MARK + DICT)
970
971
self.memoize(obj)
972
self._batch_setitems(obj.items())
973
974
dispatch[dict] = save_dict
975
if PyStringMap is not None:
976
dispatch[PyStringMap] = save_dict
977
978
def _batch_setitems(self, items):
979
# Helper to batch up SETITEMS sequences; proto >= 1 only
980
save = self.save
981
write = self.write
982
983
if not self.bin:
984
for k, v in items:
985
save(k)
986
save(v)
987
write(SETITEM)
988
return
989
990
it = iter(items)
991
while True:
992
tmp = list(islice(it, self._BATCHSIZE))
993
n = len(tmp)
994
if n > 1:
995
write(MARK)
996
for k, v in tmp:
997
save(k)
998
save(v)
999
write(SETITEMS)
1000
elif n: