pytorch/torch/csrc/serialization.cpp
Edward Yang 517c7c9861 Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.

I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.

I used the following script to do the canonicalization:

```
  import subprocess
  import re
  import os.path

  files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
  for fn in files:
      if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
          continue
      if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
          continue
      with open(fn, 'r') as f:
          c = f.read()
      def fmt(p):
          return "#include <{}>".format(p)
      def repl(m):
          p = m.group(1)
          if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
              return fmt(p)
          if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
              return fmt(p)
          for root in ["aten/src", "torch/lib", ""]:
              for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
                  new_p = os.path.relpath(os.path.join(bad_root, p), root)
                  if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
                      return fmt(new_p)
          print("ERROR: ", fn, p)
          return m.group(0)
      new_c = re.sub(r'#include "([^"]+)"', repl, c)
      if new_c != c:
          print(fn)
          with open(fn, 'w') as f:
              f.write(new_c)
```

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849

Reviewed By: dzhulgakov

Differential Revision: D13363445

Pulled By: ezyang

fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-08 19:38:30 -08:00

185 lines
6.3 KiB
C++

#include <torch/csrc/python_headers.h>
#include <system_error>
#include <torch/csrc/THP.h>
#include <torch/csrc/serialization.h>
template <class io>
ssize_t doPartialRead(io fildes, void* buf, size_t nbytes);
template <class io>
ssize_t doPartialWrite(io fildes, void* buf, size_t nbytes);
static ssize_t doPartialPythonReadBuffered(PyObject* fildes, void* buf, size_t nbytes);
static ssize_t doPartialPythonReadInto(PyObject* fildes, void* buf, size_t nbytes);
static ssize_t doPartialPythonWrite(PyObject* fildes, void* buf, size_t nbytes);
template <>
ssize_t doPartialRead<int>(int fildes, void* buf, size_t nbytes) {
return read(fildes, buf, nbytes);
}
template <>
ssize_t doPartialRead<PyObject*>(PyObject* fildes, void* buf, size_t nbytes) {
// Try to use fildes.readinto() instead of fildes.read()
// because it is more memory efficient.
// TODO: Stop calling PyObject_HasAttrString() in a loop on our read loop
auto has_readinto = PyObject_HasAttrString(fildes, "readinto") == 1;
if (has_readinto) {
return doPartialPythonReadInto(fildes, buf, nbytes);
}
return doPartialPythonReadBuffered(fildes, buf, nbytes);
}
template <>
ssize_t doPartialWrite<int>(int fildes, void* buf, size_t nbytes) {
return write(fildes, buf, nbytes);
}
template <>
ssize_t doPartialWrite<PyObject*>(PyObject* fildes, void* buf, size_t nbytes) {
return doPartialPythonWrite(fildes, buf, nbytes);
}
static inline bool isUnsupportedOperation() {
THPObjectPtr io(PyImport_ImportModule("io"));
if (!io) throw python_error();
THPObjectPtr exception(PyObject_GetAttrString(io, "UnsupportedOperation"));
if (!exception) throw python_error();
return PyErr_ExceptionMatches(exception.get());
}
// Call Python fildes.read(nbytes) and copy it to buf.
static inline ssize_t doPartialPythonReadBuffered(PyObject* fildes, void* buf, size_t raw_nbytes) {
// If we request a large amount of data, f.read() will internally try to
// allocate a buffer of that size. This is counterproductive, because
// it's not the buffer we ultimately want to write the data into. Read
// less than that and avoid allocating too much extra memory.
// TODO: Maybe 260 KB is a bit small...
const size_t nbytes = std::min<size_t>(raw_nbytes, 262144u); // 2^18 (~260 KB)
THPObjectPtr r(PyObject_CallMethod(fildes, "read", "i", nbytes));
if (!r) throw python_error();
// read output is String (Python 2) / Bytes (Python 3)
#if PY_MAJOR_VERSION >= 3
auto size = PyBytes_GET_SIZE(r.get());
const void* py_buf = PyBytes_AsString(r.get());
#else
auto size = PyString_GET_SIZE(r.get());
const void* py_buf = PyString_AsString(r.get());
#endif
// we read EOF
if (size == 0) {
return 0;
}
// Slurp it into the buffer we actually want
memcpy(buf, py_buf, size);
return size;
}
// Either does fildes.readinto(buf) or fildes.write(buf)
static inline ssize_t doPartialPythonIO(PyObject* fildes, void* buf, size_t nbytes, bool is_read) {
#if PY_MAJOR_VERSION >= 3
auto rw_flag = is_read ? PyBUF_WRITE : PyBUF_READ;
THPObjectPtr memview(PyMemoryView_FromMemory(
reinterpret_cast<char*>(buf), nbytes, rw_flag));
#else
THPObjectPtr memview(PyBuffer_FromReadWriteMemory(buf, nbytes));
#endif
if (!memview) throw python_error();
char* method = "write";
if (is_read) {
method = "readinto";
}
THPObjectPtr r(PyObject_CallMethod(fildes, method, "O", memview.get()));
if (r) {
return PyLong_AsSsize_t(r.get());
}
// fildes.readinto can return UnsupportedOperation so fall back to fildes.read.
if (is_read && isUnsupportedOperation()) {
PyErr_Clear();
return doPartialPythonReadBuffered(fildes, buf, nbytes);
}
throw python_error();
}
// Call Python fildes.readinto(buf)
static ssize_t doPartialPythonReadInto(PyObject* fildes, void* buf, size_t nbytes) {
return doPartialPythonIO(fildes, buf, nbytes, /* is_read */ true);
}
// Call Python fildes.write(buf)
static ssize_t doPartialPythonWrite(PyObject* fildes, void* buf, size_t nbytes) {
return doPartialPythonIO(fildes, buf, nbytes, /* is_read */ false);
}
// Requires that we read EXACTLY nbytes; fails if we don't.
template <typename io>
void doRead(io fildes, void* raw_buf, size_t nbytes) {
char* buf = static_cast<char*>(raw_buf);
while (nbytes > 0) {
errno = 0; // doPartialRead may not set errno
// we read in 1GB blocks to avoid bugs on Mac OS X Lion
// see https://github.com/pytorch/pytorch/issues/1031 for more details
ssize_t r = doPartialRead(fildes, buf, std::min<size_t>(nbytes, 1073741824));
if (r < 0) {
int err = errno;
AT_ASSERTM(err != 0, "read(): impossible! r < 0, but no errno was set");
AT_ASSERTM(err != EAGAIN, "read(): non-blocking fd ", fildes,
" read EAGAIN; cowardly refusing to spin-wait");
if (err == EINTR) {
continue;
} else {
AT_ERROR("read(): fd ", fildes, " failed with ", strerror(err));
}
} else if (r == 0) {
break;
}
buf += r;
// This is guaranteed by POSIX, but I just want to be double-sure
// to not underflow a signed integer.
AT_ASSERT(static_cast<size_t>(r) <= nbytes);
nbytes -= r;
}
if (nbytes != 0) {
AT_ERROR("unexpected EOF, expected ", nbytes, " more bytes. The file might be corrupted.");
}
}
template <typename io>
void doWrite(io fildes, void* raw_buf, size_t nbytes) {
char* buf = static_cast<char*>(raw_buf);
while (nbytes > 0) {
errno = 0; // doPartialWrite may not set errno
// we write in 1GB blocks to avoid bugs on Mac OS X Lion
// see https://github.com/pytorch/pytorch/issues/1031 for more details
ssize_t r = doPartialWrite(fildes, buf, std::min<size_t>(nbytes, 1073741824));
if (r < 0) {
int err = errno;
AT_ASSERTM(err != 0, "write(): impossible! r < 0, but no errno was set");
AT_ASSERTM(err != EAGAIN, "write(): non-blocking fd ", fildes,
" read EAGAIN; cowardly refusing to spin-wait");
if (err == EINTR) {
continue;
} else {
AT_ERROR("write(): fd ", fildes, " failed with ", strerror(err));
}
}
buf += r;
AT_ASSERT(static_cast<size_t>(r) <= nbytes);
nbytes -= r;
}
}
#include <torch/csrc/generic/serialization.cpp>
#include <TH/THGenerateAllTypes.h>
#include <torch/csrc/generic/serialization.cpp>
#include <TH/THGenerateHalfType.h>