[JIT] Make new zip serialization for torch save/load significantly (~70%) faster (#38379)

Summary: Before: ``` 2020-05-11 18:31:41 INFO Benchmarking 'basic', best of 10 runs (with 1 warmup runs) { "Big Tensors Save": { "mean": 17.8048762, "median": 17.458917 }, "Big Tensors Load": { "mean": 3.2556887, "median": 2.9668495000000004 }, "Small Tensors Save": { "mean": 4.0381357, "median": 3.9440125 }, "Small Tensors Load": { "mean": 5.8792499, "median": 5.603067 }, "benchmark_run_at": "2020-05-12T01:31:41" } ``` After ``` Use zipfile serialization: True 2020-05-12 20:15:32 INFO Benchmarking 'basic', best of 10 runs (with 1 warmup runs) { "Big Tensors Save": { "mean": 4.7534657, "median": 4.646732 }, "Big Tensors Load": { "mean": 3.6001919, "median": 3.493285 }, "Small Tensors Save": { "mean": 4.1066924, "median": 4.1219255 }, "Small Tensors Load": { "mean": 6.3902358, "median": 6.36977 }, "benchmark_run_at": "2020-05-13T03:15:32" } ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/38379 Differential Revision: D21779494 Pulled By: voznesenskym fbshipit-source-id: 694d65029a5b817424d454bd331e285df828c67a
2025-12-06 12:20:52 +01:00 · 2020-05-29 01:54:00 -07:00 · 2020-05-29 01:54:00 -07:00 · fce01a9bab
commit fce01a9bab
parent b08a4aaf3b
8 changed files with 1400 additions and 4 deletions
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@ -210,6 +210,8 @@ if(UNIX)
  endif(HAVE_MALLOC_USABLE_SIZE)
 endif(UNIX)

+ADD_DEFINITIONS(-DUSE_EXTERNAL_MZCRC)
+
 if(NOT MSVC)
  list(APPEND ATen_CPU_DEPENDENCY_LIBS m)
 endif()
--- a/benchmarks/serialization/simple_measurement.py
+++ b/benchmarks/serialization/simple_measurement.py
@ -0,0 +1,33 @@
+import torch
+from pyarkbench import Benchmark, Timer, default_args
+
+use_new = True
+
+class Basic(Benchmark):
+    def benchmark(self):
+        x = [torch.ones(200, 200) for i in range(30)]
+        with Timer() as big1:
+            torch.save(x, "big_tensor.zip", _use_new_zipfile_serialization=use_new)
+
+        with Timer() as big2:
+            v = torch.load("big_tensor.zip")
+
+        x = [torch.ones(10, 10) for i in range(200)]
+        with Timer() as small1:
+            torch.save(x, "small_tensor.zip", _use_new_zipfile_serialization=use_new)
+
+        with Timer() as small2:
+            v = torch.load("small_tensor.zip")
+
+        return {
+            "Big Tensors Save": big1.ms_duration,
+            "Big Tensors Load": big2.ms_duration,
+            "Small Tensors Save": small1.ms_duration,
+            "Small Tensors Load": small2.ms_duration,
+        }
+
+if __name__ == '__main__':
+    bench = Basic(*default_args.bench())
+    print("Use zipfile serialization:", use_new)
+    results = bench.run()
+    bench.print_stats(results, stats=['mean', 'median'])
--- a/caffe2/serialize/CMakeLists.txt
+++ b/caffe2/serialize/CMakeLists.txt
@ -6,6 +6,7 @@ list(APPEND Caffe2_CPU_SRCS
  ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
  ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
  ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
+  ${CMAKE_CURRENT_SOURCE_DIR}/crc.cc
  ${CMAKE_CURRENT_SOURCE_DIR}/read_adapter_interface.cc)
 list(APPEND Caffe2_CPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/miniz-2.0.8)

--- a/caffe2/serialize/crc.cc
+++ b/caffe2/serialize/crc.cc
@ -0,0 +1,14 @@
+#include "miniz.h"
+#include <iostream>
+
+#include "caffe2/serialize/crc_alt.h"
+
+extern "C" {
+// See: miniz.h
+#if defined(USE_EXTERNAL_MZCRC) 
+mz_ulong mz_crc32(mz_ulong crc, const mz_uint8* ptr, size_t buf_len) {
+  auto z = crc32_fast(ptr, buf_len, crc);
+  return z;
+};
+#endif
+}
--- a/caffe2/serialize/crc_alt.h
+++ b/caffe2/serialize/crc_alt.h
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@ -1655,6 +1655,7 @@ if(NOT INTERN_BUILD_MOBILE)
    endif(HAVE_MALLOC_USABLE_SIZE)
  endif(UNIX)

+  add_definitions(-DUSE_EXTERNAL_MZCRC)
  add_definitions(-DMINIZ_DISABLE_ZIP_READER_CRC32_CHECKS)

  # Is __thread supported?
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@ -472,6 +472,7 @@ class SerializationMixin(object):
        b = torch.load(data)
        self.assertTrue(data.was_called('readinto'))

+
    def test_serialization_storage_slice(self):
        # Generated using:
        #
@ -542,6 +543,18 @@ class serialization_method(object):
    def __exit__(self, *args, **kwargs):
        torch.save = self.torch_save

+class TestBothSerialization(TestCase, SerializationMixin):
+    def test_serialization_new_format_old_format_compat(self):
+        x = [torch.ones(200, 200) for i in range(30)]
+        torch.save(x, "big_tensor.zip", _use_new_zipfile_serialization=True)
+        x_new_load = torch.load("big_tensor.zip")
+        self.assertEqual(x, x_new_load)
+
+        torch.save(x, "big_tensor.zip", _use_new_zipfile_serialization=False)
+        x_old_load = torch.load("big_tensor.zip")
+        self.assertEqual(x_old_load, x_new_load)
+        os.remove("big_tensor.zip")
+

 class TestOldSerialization(TestCase, SerializationMixin):
    # unique_key is necessary because on Python 2.7, if a warning passed to
--- a/third_party/miniz-2.0.8/miniz.c
+++ b/third_party/miniz-2.0.8/miniz.c
@ -6023,8 +6023,8 @@ mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name
 }

 mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size,
-                                    mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified,
-                                    const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
+                                     mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified,
+                                     const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len)
 {
    mz_uint16 method = 0, dos_time = 0, dos_date = 0;
    mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
@ -6298,8 +6298,8 @@ mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_n
    }

    if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, extra_size, pComment,
-                                          comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes,
-                                          user_extra_data_central, user_extra_data_central_len))
+                                        comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes,
+                                        user_extra_data_central, user_extra_data_central_len))
        return MZ_FALSE;

    pZip->m_total_files++;