Skip to content

torch.load() and torch.save() of big tensors is slow due to tar #606

@adamlerer

Description

@adamlerer
import torch
import time

t = torch.FloatTensor(1000000000).zero_()
start = time.time();
u = torch.load('foo.pt');
print(time.time() - start)

13.5132961273

The equivalent code in Torch takes ~2.5s.

perf top says it's in copy_user_enhanced_fast_string

and if I interrupt it the stack trace is

.../torch/serialization.pyc in load(f, map_location, pickle_module)
    246         f = open(f, 'rb')
    247     try:
--> 248         return _load(f, map_location, pickle_module)
    249     finally:
    250         if new_fd:

.../torch/serialization.pyc in _load(f, map_location, pickle_module)
    315          mkdtemp() as tmpdir:
    316
--> 317         tar.extract('storages', path=tmpdir)
    318         with open(os.path.join(tmpdir, 'storages'), 'rb', 0) as f:
    319             num_storages = pickle_module.load(f)

/usr/local/gcc-4.9-glibc-2.20/lib/python2.7/tarfile.py in extract(self, member, path)
   2086
   2087         try:
-> 2088             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
   2089         except EnvironmentError, e:
   2090             if self.errorlevel > 0:

/usr/local/gcc-4.9-glibc-2.20/lib/python2.7/tarfile.py in _extract_member(self, tarinfo, targetpath)
   2162
   2163         if tarinfo.isreg():
-> 2164             self.makefile(tarinfo, targetpath)
   2165         elif tarinfo.isdir():
   2166             self.makedir(tarinfo, targetpath)

/usr/local/gcc-4.9-glibc-2.20/lib/python2.7/tarfile.py in makefile(self, tarinfo, targetpath)
   2203         try:
   2204             with bltn_open(targetpath, "wb") as target:
-> 2205                 copyfileobj(source, target)
   2206         finally:
   2207             source.close()

/usr/local/gcc-4.9-glibc-2.20/lib/python2.7/tarfile.py in copyfileobj(src, dst, length)
    263         return
    264     if length is None:
--> 265         shutil.copyfileobj(src, dst)
    266         return
    267

/usr/local/gcc-4.9-glibc-2.20/lib/python2.7/shutil.py in copyfileobj(fsrc, fdst, length)
     50         if not buf:
     51             break
---> 52         fdst.write(buf)
     53
     54 def _samefile(src, dst):

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions