mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
[JIT] Make new zip serialization for torch save/load significantly (~70%) faster (#38379)
Summary:
Before:
```
2020-05-11 18:31:41 INFO Benchmarking 'basic', best of 10 runs (with 1 warmup runs)
{
"Big Tensors Save": {
"mean": 17.8048762,
"median": 17.458917
},
"Big Tensors Load": {
"mean": 3.2556887,
"median": 2.9668495000000004
},
"Small Tensors Save": {
"mean": 4.0381357,
"median": 3.9440125
},
"Small Tensors Load": {
"mean": 5.8792499,
"median": 5.603067
},
"benchmark_run_at": "2020-05-12T01:31:41"
}
```
After
```
Use zipfile serialization: True
2020-05-12 20:15:32 INFO Benchmarking 'basic', best of 10 runs (with 1 warmup runs)
{
"Big Tensors Save": {
"mean": 4.7534657,
"median": 4.646732
},
"Big Tensors Load": {
"mean": 3.6001919,
"median": 3.493285
},
"Small Tensors Save": {
"mean": 4.1066924,
"median": 4.1219255
},
"Small Tensors Load": {
"mean": 6.3902358,
"median": 6.36977
},
"benchmark_run_at": "2020-05-13T03:15:32"
}
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/38379
Differential Revision: D21779494
Pulled By: voznesenskym
fbshipit-source-id: 694d65029a5b817424d454bd331e285df828c67a
This commit is contained in:
parent
b08a4aaf3b
commit
fce01a9bab
|
|
@ -210,6 +210,8 @@ if(UNIX)
|
|||
endif(HAVE_MALLOC_USABLE_SIZE)
|
||||
endif(UNIX)
|
||||
|
||||
ADD_DEFINITIONS(-DUSE_EXTERNAL_MZCRC)
|
||||
|
||||
if(NOT MSVC)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS m)
|
||||
endif()
|
||||
|
|
|
|||
33
benchmarks/serialization/simple_measurement.py
Normal file
33
benchmarks/serialization/simple_measurement.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import torch
|
||||
from pyarkbench import Benchmark, Timer, default_args
|
||||
|
||||
use_new = True
|
||||
|
||||
class Basic(Benchmark):
|
||||
def benchmark(self):
|
||||
x = [torch.ones(200, 200) for i in range(30)]
|
||||
with Timer() as big1:
|
||||
torch.save(x, "big_tensor.zip", _use_new_zipfile_serialization=use_new)
|
||||
|
||||
with Timer() as big2:
|
||||
v = torch.load("big_tensor.zip")
|
||||
|
||||
x = [torch.ones(10, 10) for i in range(200)]
|
||||
with Timer() as small1:
|
||||
torch.save(x, "small_tensor.zip", _use_new_zipfile_serialization=use_new)
|
||||
|
||||
with Timer() as small2:
|
||||
v = torch.load("small_tensor.zip")
|
||||
|
||||
return {
|
||||
"Big Tensors Save": big1.ms_duration,
|
||||
"Big Tensors Load": big2.ms_duration,
|
||||
"Small Tensors Save": small1.ms_duration,
|
||||
"Small Tensors Load": small2.ms_duration,
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
bench = Basic(*default_args.bench())
|
||||
print("Use zipfile serialization:", use_new)
|
||||
results = bench.run()
|
||||
bench.print_stats(results, stats=['mean', 'median'])
|
||||
|
|
@ -6,6 +6,7 @@ list(APPEND Caffe2_CPU_SRCS
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/crc.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/read_adapter_interface.cc)
|
||||
list(APPEND Caffe2_CPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/miniz-2.0.8)
|
||||
|
||||
|
|
|
|||
14
caffe2/serialize/crc.cc
Normal file
14
caffe2/serialize/crc.cc
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#include "miniz.h"
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/serialize/crc_alt.h"
|
||||
|
||||
extern "C" {
|
||||
// See: miniz.h
|
||||
#if defined(USE_EXTERNAL_MZCRC)
|
||||
mz_ulong mz_crc32(mz_ulong crc, const mz_uint8* ptr, size_t buf_len) {
|
||||
auto z = crc32_fast(ptr, buf_len, crc);
|
||||
return z;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
1332
caffe2/serialize/crc_alt.h
Normal file
1332
caffe2/serialize/crc_alt.h
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -1655,6 +1655,7 @@ if(NOT INTERN_BUILD_MOBILE)
|
|||
endif(HAVE_MALLOC_USABLE_SIZE)
|
||||
endif(UNIX)
|
||||
|
||||
add_definitions(-DUSE_EXTERNAL_MZCRC)
|
||||
add_definitions(-DMINIZ_DISABLE_ZIP_READER_CRC32_CHECKS)
|
||||
|
||||
# Is __thread supported?
|
||||
|
|
|
|||
|
|
@ -472,6 +472,7 @@ class SerializationMixin(object):
|
|||
b = torch.load(data)
|
||||
self.assertTrue(data.was_called('readinto'))
|
||||
|
||||
|
||||
def test_serialization_storage_slice(self):
|
||||
# Generated using:
|
||||
#
|
||||
|
|
@ -542,6 +543,18 @@ class serialization_method(object):
|
|||
def __exit__(self, *args, **kwargs):
|
||||
torch.save = self.torch_save
|
||||
|
||||
class TestBothSerialization(TestCase, SerializationMixin):
|
||||
def test_serialization_new_format_old_format_compat(self):
|
||||
x = [torch.ones(200, 200) for i in range(30)]
|
||||
torch.save(x, "big_tensor.zip", _use_new_zipfile_serialization=True)
|
||||
x_new_load = torch.load("big_tensor.zip")
|
||||
self.assertEqual(x, x_new_load)
|
||||
|
||||
torch.save(x, "big_tensor.zip", _use_new_zipfile_serialization=False)
|
||||
x_old_load = torch.load("big_tensor.zip")
|
||||
self.assertEqual(x_old_load, x_new_load)
|
||||
os.remove("big_tensor.zip")
|
||||
|
||||
|
||||
class TestOldSerialization(TestCase, SerializationMixin):
|
||||
# unique_key is necessary because on Python 2.7, if a warning passed to
|
||||
|
|
|
|||
8
third_party/miniz-2.0.8/miniz.c
vendored
8
third_party/miniz-2.0.8/miniz.c
vendored
|
|
@ -6023,8 +6023,8 @@ mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name
|
|||
}
|
||||
|
||||
mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size,
|
||||
mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified,
|
||||
const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
|
||||
mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified,
|
||||
const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
|
||||
{
|
||||
mz_uint16 method = 0, dos_time = 0, dos_date = 0;
|
||||
mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
|
||||
|
|
@ -6298,8 +6298,8 @@ mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_n
|
|||
}
|
||||
|
||||
if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, extra_size, pComment,
|
||||
comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes,
|
||||
user_extra_data_central, user_extra_data_central_len))
|
||||
comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes,
|
||||
user_extra_data_central, user_extra_data_central_len))
|
||||
return MZ_FALSE;
|
||||
|
||||
pZip->m_total_files++;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user