pytorch/torch/lib/THD/base/DataChannel.hpp
Edward Yang 517c7c9861 Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.

I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.

I used the following script to do the canonicalization:

```
  import subprocess
  import re
  import os.path

  files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
  for fn in files:
      if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
          continue
      if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
          continue
      with open(fn, 'r') as f:
          c = f.read()
      def fmt(p):
          return "#include <{}>".format(p)
      def repl(m):
          p = m.group(1)
          if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
              return fmt(p)
          if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
              return fmt(p)
          for root in ["aten/src", "torch/lib", ""]:
              for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
                  new_p = os.path.relpath(os.path.join(bad_root, p), root)
                  if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
                      return fmt(new_p)
          print("ERROR: ", fn, p)
          return m.group(0)
      new_c = re.sub(r'#include "([^"]+)"', repl, c)
      if new_c != c:
          print(fn)
          with open(fn, 'w') as f:
              f.write(new_c)
```

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849

Reviewed By: dzhulgakov

Differential Revision: D13363445

Pulled By: ezyang

fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-08 19:38:30 -08:00

169 lines
5.2 KiB
C++

#pragma once
#include <THD/base/ChannelType.h>
#include <THD/base/ChannelUtils.hpp>
#include <THD/base/DataChannel.h>
#include <THD/base/Scalar.hpp>
#include <THD/base/init_methods/InitMethod.hpp>
#include <ATen/ATen.h>
#include <unordered_map>
#include <utility>
#include <vector>
MAKE_HASHABLE(THDReduceOp, static_cast<int>(t));
MAKE_HASHABLE(thd::RPCType, static_cast<char>(t));
MAKE_HASHABLE(at::ScalarType, static_cast<int>(t));
namespace thd {
struct DataChannel {
struct Request {
Request(){};
virtual ~Request(){};
// Checks if request has completed. Non-blocking operation.
virtual bool isCompleted() = 0;
// Waits until request completes. Blocking operation.
virtual void wait() = 0;
};
struct Group {
Group();
/*
* Constructs `Group` from provided `ranks` and checks if all ranks are
* in range: [0, `max_rank`].
*
* `ranks` vector should have mapping from new ranks to old ranks (global
* ranks) eg. ranks = {[0] = 6, [1] = 2} which means that 0 and 1 are new
* ranks in group and 6, 2 are global ranks corresponding to 0 and 1
* respectively.
*/
Group(std::vector<rank_type> ranks, rank_type max_rank);
virtual ~Group();
rank_type size() const;
/*
* In contrast to `getGroupRank` this function throws `std::logic_error`
* when rank is member of this group.
*/
rank_type mustGetGroupRank(rank_type global_rank) const;
std::pair<rank_type, bool> getGroupRank(rank_type global_rank) const;
/*
* In contrast to `getGlobalRank` this function throws `std::logic_error`
* when provided `group_rank` is not in range of group.
*/
rank_type mustGetGlobalRank(rank_type group_rank) const;
std::pair<rank_type, bool> getGlobalRank(rank_type group_rank) const;
private:
// maps new group ranks to old ranks (global ranks)
std::vector<rank_type> _new2old;
// maps old ranks (global ranks) to new group ranks
std::unordered_map<rank_type, rank_type> _old2new;
};
DataChannel(){};
virtual ~DataChannel(){};
virtual bool init() = 0;
/**
* This is required for NCCL backend, since the destroy cannot be done before
* CUDA is unloaded since DataChannel is a static object.
*/
virtual void destroy() = 0;
virtual rank_type getRank() = 0;
virtual rank_type getNumProcesses() = 0;
/**
* All gather inputs from multiple GPUs, each Tensor in input vector should be
* on a separate GPU.
*
* Also note that the output vector is a 1D vector (flattened from 2D),
* with the size of input.size() * world_size.
*
* For instance, rank i 's input[k] tensor would be in
* output[i * input.size() + k].
*/
virtual void allGather(
std::vector<at::Tensor>& output,
std::vector<at::Tensor>& input,
THDGroup groupId = THDGroupWORLD) = 0;
virtual void allGather(
std::vector<at::Tensor>& output,
at::Tensor& input,
THDGroup group_id = THDGroupWORLD) = 0;
virtual void gather(
std::vector<at::Tensor>& output,
at::Tensor& input,
rank_type dst_rank,
THDGroup group_id = THDGroupWORLD) = 0;
virtual void scatter(
std::vector<at::Tensor>& input,
at::Tensor& output,
rank_type src_rank,
THDGroup group_id = THDGroupWORLD) = 0;
// All reduce multiple GPUs on a number of nodes
virtual void allReduce(
std::vector<at::Tensor>& data,
THDReduceOp operation,
THDGroup group_id = THDGroupWORLD) = 0;
virtual void allReduce(
at::Tensor& data,
THDReduceOp operation,
THDGroup group_id = THDGroupWORLD) = 0;
/**
* Reduce multiple GPUs on a number of nodes
* data[0]'s GPU in dstRank will receive the result
*/
virtual void reduce(
std::vector<at::Tensor>& data,
THDReduceOp operation,
rank_type dstRank,
THDGroup groupId = THDGroupWORLD) = 0;
virtual void reduce(
at::Tensor& data,
THDReduceOp operation,
rank_type dst_rank,
THDGroup group_id = THDGroupWORLD) = 0;
/**
* Broadcast multiple GPUs on a number of nodes
* data[0]'s GPU in srcRank will be the source to broadcast
*/
virtual void broadcast(
std::vector<at::Tensor>& data,
rank_type srcRank,
THDGroup groupId = THDGroupWORLD) = 0;
virtual void broadcast(
at::Tensor& data,
rank_type src_rank,
THDGroup group_id = THDGroupWORLD) = 0;
virtual void send(Scalar& value, rank_type src_rank) = 0;
virtual void send(at::Tensor& data, rank_type dst_rank) = 0;
virtual void receive(Scalar& value, rank_type src_rank) = 0;
virtual rank_type receive(at::Tensor& data) = 0; // receive from any source
virtual void receive(at::Tensor& data, rank_type src_rank) = 0;
virtual Request* isend(at::Tensor& data, rank_type dst_rank) = 0;
virtual Request* ireceive(at::Tensor& data, rank_type src_rank) = 0;
virtual void barrier(THDGroup group_id = THDGroupWORLD) = 0;
virtual THDGroup newGroup(const std::vector<rank_type>& ranks) = 0;
virtual void clearGroupCache(THDGroup group_id = THDGroupWORLD) = 0;
static DataChannel* newChannel(
THDChannelType type,
std::string init_method,
int world_size,
std::string group_name,
int rank);
};
} // namespace thd