mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.
I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.
I used the following script to do the canonicalization:
```
import subprocess
import re
import os.path
files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
for fn in files:
if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
continue
if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
continue
with open(fn, 'r') as f:
c = f.read()
def fmt(p):
return "#include <{}>".format(p)
def repl(m):
p = m.group(1)
if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
return fmt(p)
if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
return fmt(p)
for root in ["aten/src", "torch/lib", ""]:
for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
new_p = os.path.relpath(os.path.join(bad_root, p), root)
if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
return fmt(new_p)
print("ERROR: ", fn, p)
return m.group(0)
new_c = re.sub(r'#include "([^"]+)"', repl, c)
if new_c != c:
print(fn)
with open(fn, 'w') as f:
f.write(new_c)
```
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849
Reviewed By: dzhulgakov
Differential Revision: D13363445
Pulled By: ezyang
fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
122 lines
3.7 KiB
C++
122 lines
3.7 KiB
C++
#include <torch/csrc/utils/tensor_flatten.h>
|
|
|
|
#include <map>
|
|
#include <unordered_map>
|
|
|
|
namespace torch { namespace utils {
|
|
|
|
using namespace at;
|
|
|
|
std::vector<TensorGroup> take_tensors(
|
|
TensorList tensors,
|
|
size_t size_limit,
|
|
bool fine_grained) {
|
|
std::vector<TensorGroup> results;
|
|
// an overapproximation, but at least we won't have to copy stuff around
|
|
results.reserve(tensors.size());
|
|
std::map<TypeID, TensorGroup> groups;
|
|
size_t cur_group_size = 0;
|
|
|
|
for (const auto & tensor : tensors) {
|
|
auto& type = tensor.type();
|
|
size_t tensor_size;
|
|
if (type.is_sparse()) {
|
|
const auto& indices = tensor._indices();
|
|
const auto& values = tensor._values();
|
|
tensor_size = indices.numel() * indices.type().elementSizeInBytes() +
|
|
values.numel() * indices.type().elementSizeInBytes();
|
|
} else {
|
|
tensor_size = tensor.numel() * type.elementSizeInBytes();
|
|
}
|
|
|
|
auto& type_group = groups[type.ID()];
|
|
type_group.tensors.push_back(tensor);
|
|
|
|
if (fine_grained) {
|
|
cur_group_size += tensor_size;
|
|
// Regardless the type, the current total size exceeds the limit
|
|
if (cur_group_size >= size_limit) {
|
|
// Spill all types to separate groups in results
|
|
for (auto& entry : groups) {
|
|
auto& group = entry.second;
|
|
results.emplace_back(std::move(group));
|
|
}
|
|
cur_group_size = 0;
|
|
groups.clear();
|
|
}
|
|
} else {
|
|
type_group.size += tensor_size;
|
|
if (type_group.size >= size_limit) {
|
|
results.emplace_back();
|
|
std::swap(results.back(), type_group);
|
|
}
|
|
}
|
|
}
|
|
// End case. Look for any remaining groups and return them.
|
|
for (auto& entry : groups) {
|
|
auto& group = entry.second;
|
|
if (!fine_grained && group.size == 0) {
|
|
continue;
|
|
}
|
|
results.emplace_back(std::move(group));
|
|
}
|
|
return results;
|
|
}
|
|
|
|
void reorder_tensors_like(std::vector<Tensor>& tensors, TensorList order) {
|
|
AT_ASSERT(tensors.size() == order.size());
|
|
std::unordered_map<at::Type*, std::vector<size_t>> type_indices;
|
|
for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; ++i)
|
|
type_indices[&tensors[i].type()].push_back(i);
|
|
|
|
std::unordered_map<at::Type*, size_t> type_used;
|
|
std::vector<Tensor> ordered_tensors;
|
|
ordered_tensors.reserve(tensors.size());
|
|
for (auto & tmpl_tensor : order) {
|
|
auto * type = &tmpl_tensor.type();
|
|
auto & indices = type_indices[type];
|
|
auto & used = type_used[type];
|
|
ordered_tensors.push_back(tensors[indices[used++]]);
|
|
}
|
|
std::swap(tensors, ordered_tensors);
|
|
}
|
|
|
|
namespace {
|
|
|
|
at::Tensor get_indices(const at::Tensor& t) {
|
|
return t._indices();
|
|
}
|
|
|
|
at::Tensor get_values(const at::Tensor& t) {
|
|
return t._values();
|
|
}
|
|
|
|
}
|
|
|
|
std::pair<at::Tensor, at::Tensor> flatten_sparse_tensors(at::TensorList tensors) {
|
|
auto flat_indices = flatten_dense_tensors(fmap(tensors, &get_indices));
|
|
auto flat_values = flatten_dense_tensors(fmap(tensors, &get_values));
|
|
return std::make_pair(flat_indices, flat_values);
|
|
}
|
|
|
|
std::vector<at::Tensor> unflatten_sparse_tensors(
|
|
const at::Tensor& flat_indices, const at::Tensor& flat_values,
|
|
at::TensorList tensors) {
|
|
if (tensors.size() == 0) return {};
|
|
|
|
auto indices = unflatten_dense_tensors(flat_indices, fmap(tensors, &get_indices));
|
|
auto values = unflatten_dense_tensors(flat_values, fmap(tensors, &get_values));
|
|
|
|
std::vector<at::Tensor> outputs;
|
|
outputs.reserve(tensors.size());
|
|
for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; ++i) {
|
|
auto &ref_t = tensors[i];
|
|
auto t = at::_sparse_coo_tensor_unsafe(indices[i], values[i], ref_t.sizes());
|
|
outputs.emplace_back(t._coalesced_(ref_t.is_coalesced()));
|
|
}
|
|
return outputs;
|
|
}
|
|
|
|
|
|
}}
|