mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-06 12:20:52 +01:00
Summary: By default, TorchScript execution is single threaded and uses the caller's thread pool. For the use case of distributed inference, we hope there is a way to customize the behavior where the interpreter in torch script can be executed in other places. This diff allows an explicit taskLauncher for torchscript interpreter. Pull Request resolved: https://github.com/pytorch/pytorch/pull/46865 Test Plan: unit test is passed. fbshipit-source-id: 1d7b003926c0d1f8facc53206efb960cff8897ac Fixes #{issue number} Reviewed By: houseroad Differential Revision: D24616102 Pulled By: garroud fbshipit-source-id: 79202b62f92d0b0baf72e4bf7aa3f05e0da91d59
72 lines
2.4 KiB
C++
72 lines
2.4 KiB
C++
#include <gtest/gtest.h>
|
|
|
|
#include "test/cpp/jit/test_utils.h"
|
|
#include "torch/csrc/jit/runtime/graph_executor.h"
|
|
#include "torch/jit.h"
|
|
#include "torch/script.h"
|
|
#include "torch/torch.h"
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
|
|
TEST(GraphExecutorTest, Basic_CUDA) {
|
|
constexpr int batch_size = 4;
|
|
constexpr int input_size = 256;
|
|
|
|
int hidden_size = 2 * input_size;
|
|
|
|
auto input = at::randn({batch_size, input_size}, at::kCUDA);
|
|
auto hx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto cx = at::randn({batch_size, hidden_size}, at::kCUDA);
|
|
auto w_ih = t_def(at::randn({4 * hidden_size, input_size}, at::kCUDA));
|
|
auto w_hh = t_def(at::randn({4 * hidden_size, hidden_size}, at::kCUDA));
|
|
|
|
auto g = build_lstm();
|
|
GraphExecutor executor(g, "");
|
|
auto stack = createStack({input, hx, cx, w_ih, w_hh});
|
|
executor.run(stack);
|
|
ASSERT_EQ(stack.size(), 2);
|
|
at::Tensor r0, r1;
|
|
std::tie(r0, r1) = lstm(input, hx, cx, w_ih, w_hh);
|
|
ASSERT_TRUE(almostEqual(stack[0].toTensor(), r0));
|
|
ASSERT_TRUE(almostEqual(stack[1].toTensor(), r1));
|
|
}
|
|
|
|
TEST(GraphExecutorTest, runAsync_executor) {
|
|
/*
|
|
TODO: there are some problem with C++ parsing script program involving
|
|
fork. Use the test module below for now.
|
|
issue about this: github.com/pytorch/pytorch/issues/46368
|
|
The test module file is generated by following:
|
|
class DemoModule(torch.nn.Module):
|
|
def forward(self):
|
|
r1 = torch.jit.fork(torch.mm, torch.rand(100,100),torch.rand(100,100))
|
|
r2 = torch.jit.fork(torch.mm, torch.rand(100,100),torch.rand(100,100))
|
|
return r1.wait() + r2.wait()
|
|
demo = DemoModule()
|
|
torch.jit.save(torch.jit.script(demo), 'test_interpreter_async.pth')
|
|
*/
|
|
std::string filePath(__FILE__);
|
|
auto testModelFile = filePath.substr(0, filePath.find_last_of("/\\") + 1);
|
|
testModelFile.append("test_interpreter_async.pt");
|
|
auto module = load(testModelFile);
|
|
auto graph = module.get_method("forward").graph();
|
|
GraphExecutor graphExecutor(graph, "");
|
|
auto asyncCounter = 0;
|
|
std::mutex mtx;
|
|
// a dummy executor which actually use at::launch, but add up a counter
|
|
auto launcher = [&](std::function<void()> f) {
|
|
mtx.lock();
|
|
++asyncCounter;
|
|
mtx.unlock();
|
|
at::launch(move(f));
|
|
};
|
|
std::vector<IValue> stack;
|
|
stack.push_back(module._ivalue());
|
|
graphExecutor.runAsync(stack, launcher)->wait();
|
|
ASSERT_TRUE(asyncCounter > 0);
|
|
}
|
|
|
|
} // namespace jit
|
|
} // namespace torch
|