pytorch/torch/csrc/distributed/rpc/script_call.cpp
Theodor Arsenij 7684044b71 Add size check before calling .back() in rpc/script_call.cpp (#94297)
Hi!

I've been fuzzing different pytorch modules, and found a crash inside one of them.

Specifically, I'm talking about a module that processes `script_call` rpc requests and a function `ScriptCall::fromIValues(std::vector<at::IValue>& ivalues)`.

Running this test case causes a crash that occurs when `ivalues.back()` is called [script_call.cpp:90](abc54f9314/torch/csrc/distributed/rpc/script_call.cpp (L90)). The crash occurs because the vector `ivalues` is empty.

All tests were performed on this pytorch version: [abc54f9314](abc54f9314)

The provided patch checks if there are enough elements in the ivalues vector.

### How to reproduce

1. To reproduce the crash, use provided docker: [Dockerfile](https://github.com/ispras/oss-sydr-fuzz/tree/master/projects/pytorch)

2. Build the container: `docker build -t oss-sydr-fuzz-pytorch-reproduce .`

3. Copy crash file to the current directory:

    - [crash-9f76d4e37a2391136a4ce07d47269db1e063e4b4.zip](https://github.com/pytorch/pytorch/files/10674059/crash-9f76d4e37a2391136a4ce07d47269db1e063e4b4.zip)

4. Run the container: ``docker run --privileged --network host -v `pwd`:/homedir --rm -it oss-sydr-fuzz-pytorch-reproduce /bin/bash``

5. And execute the binary: `/message_deserialize_fuzz /homedir/crash-9f76d4e37a2391136a4ce07d47269db1e063e4b4`

After execution completes you will see this stacktrace:

```asan
AddressSanitizer:DEADLYSIGNAL
=================================================================
==57==ERROR: AddressSanitizer: SEGV on unknown address (pc 0x0000008e7b19 bp 0x7ffd2fdded70 sp 0x7ffd2fddec40 T0)
==57==The signal is caused by a READ memory access.
==57==Hint: this fault was caused by a dereference of a high value address (see register values below).  Disassemble the provided pc to learn which register was used.
    #0 0x8e7b19 in c10::IValue::isString() const /pytorch_fuzz/aten/src/ATen/core/ivalue.h:639:27
    #1 0x8e7b19 in c10::IValue::toStringRef[abi:cxx11]() const /pytorch_fuzz/aten/src/ATen/core/ivalue_inl.h:2179:3
    #2 0xe04fb58 in torch::distributed::rpc::ScriptCall::fromIValues(std::vector<c10::IValue, std::allocator<c10::IValue> >&) /pytorch_fuzz/torch/csrc/distributed/rpc/script_call.cpp:90:53
    #3 0xe0511f0 in torch::distributed::rpc::ScriptCall::fromMessage(torch::distributed::rpc::Message const&) /pytorch_fuzz/torch/csrc/distributed/rpc/script_call.cpp:133:10
    #4 0xe0ff71e in torch::distributed::rpc::deserializeRequest(torch::distributed::rpc::Message const&) /pytorch_fuzz/torch/csrc/distributed/rpc/utils.cpp:102:14
    #5 0x602a41 in LLVMFuzzerTestOneInput /message_deserialize_fuzz.cc:192:27
    #6 0x52ce61 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:611:15
    #7 0x516d7c in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:324:6
    #8 0x51cacb in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:860:9
    #9 0x546062 in main /llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10
    #10 0x7f41e42a8082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082)
    #11 0x51169d in _start (/message_deserialize_fuzz+0x51169d)

AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /pytorch_fuzz/aten/src/ATen/core/ivalue.h:639:27 in c10::IValue::isString() const
==57==ABORTING
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/94297
Approved by: https://github.com/ezyang
2023-04-29 00:26:35 +00:00

161 lines
4.8 KiB
C++

#include <torch/csrc/distributed/rpc/rpc_agent.h>
#include <torch/csrc/distributed/rpc/script_call.h>
#include <torch/csrc/jit/serialization/pickle.h>
namespace torch {
namespace distributed {
namespace rpc {
const std::string ScriptCall::BUILTIN_OP_NAMESPACE_("torch.ops.aten.");
const std::string ScriptCall::ATEN_PREFIX_("aten::");
ScriptCall::ScriptCall(
std::shared_ptr<Operator> op,
std::vector<at::IValue>&& stack)
: op_(std::move(op)), stack_(stack), isAsyncExecution_(false) {}
ScriptCall::ScriptCall(
const c10::QualifiedName& qualifiedName,
std::vector<at::IValue>&& stack,
const bool isAsyncExecution)
: qualifiedName_(qualifiedName),
stack_(stack),
isAsyncExecution_(isAsyncExecution) {}
bool ScriptCall::hasOp() const {
return op_ ? true : false;
}
std::shared_ptr<Operator> ScriptCall::op() const {
return *op_;
}
bool ScriptCall::hasQualifiedName() const {
return qualifiedName_ ? true : false;
}
const c10::QualifiedName& ScriptCall::qualifiedName() const {
return *qualifiedName_;
}
const std::vector<at::IValue>& ScriptCall::stack() const {
return stack_;
}
std::vector<at::IValue>& ScriptCall::stackRef() {
return stack_;
}
void ScriptCall::toIValues(std::vector<at::IValue>& ivalues) const {
for (auto& value : stack_) {
ivalues.push_back(value);
}
if (hasOp()) {
TORCH_CHECK(
!hasQualifiedName(),
"It is builtin operator call, qualifiedName_ should not be set.");
// TODO: replace this with a real overload_name when FunctionSchema supports
// that.
ivalues.emplace_back(toString((*op_)->schema()));
// insert qualified name
auto opName = (*op_)->schema().name();
TORCH_CHECK(
opName.find("::") == opName.rfind("::") &&
opName.rfind(ATEN_PREFIX_) == 0,
"Unexpected operator name ",
opName);
// aten::add -> torch.ops.aten.add
opName.replace(0, ATEN_PREFIX_.length(), BUILTIN_OP_NAMESPACE_);
ivalues.emplace_back(std::move(opName));
} else if (hasQualifiedName()) {
ivalues.emplace_back(isAsyncExecution());
TORCH_CHECK(
!hasOp(),
"It is TorchScript function call, operator should not be set.");
ivalues.emplace_back((*qualifiedName_).qualifiedName());
} else {
TORCH_INTERNAL_ASSERT(
false,
"Either builtin operator or TorchScript function name should be set.");
}
}
std::unique_ptr<ScriptCall> ScriptCall::fromIValues(
std::vector<at::IValue>& ivalues) {
TORCH_INTERNAL_ASSERT(
ivalues.size() > 1,
"At least 2 IValues are required to build a ScriptCall.");
// Last element in the vector is always qualifiedName for both
// builitin operator and TorchScript function
// If the qualifiedName is not a builtin operator name, then treat it
// as TorchScript function name
const std::string& qualifiedName = ivalues.back().toStringRef();
if (qualifiedName.rfind(BUILTIN_OP_NAMESPACE_) == 0) {
ivalues.pop_back();
const std::string& str_schema = ivalues.back().toStringRef();
auto op = matchOperator(str_schema);
ivalues.pop_back();
// remove str_schema from ivalues
return std::make_unique<ScriptCall>(op, std::move(ivalues));
} else {
ivalues.pop_back();
bool isAsyncExecution = ivalues.back().toBool();
ivalues.pop_back();
return std::make_unique<ScriptCall>(
c10::QualifiedName(qualifiedName),
std::move(ivalues),
isAsyncExecution);
}
}
c10::intrusive_ptr<Message> ScriptCall::toMessageImpl() && {
std::vector<IValue> ivalues;
toIValues(ivalues);
std::vector<torch::Tensor> tensor_table;
auto payload = jit::pickle(
c10::ivalue::Tuple::create(std::move(ivalues)), &tensor_table);
return c10::make_intrusive<Message>(
std::move(payload), std::move(tensor_table), MessageType::SCRIPT_CALL);
}
std::unique_ptr<ScriptCall> ScriptCall::fromMessage(const Message& message) {
auto payload = static_cast<const char*>(message.payload().data());
auto payload_size = message.payload().size();
auto value = jit::unpickle(
payload,
payload_size,
*RpcAgent::getCurrentRpcAgent()->getTypeResolver(),
message.tensors());
auto values = value.toTupleRef().elements().vec();
return fromIValues(values);
}
std::shared_ptr<Operator> ScriptCall::matchOperator(
const std::string& str_schema) {
// TODO: This is a temporary solution. We should pass enough information to
// allow deterministically matched to one operator.
// extract symbol from the schema
auto schema = torch::jit::parseSchema(str_schema);
auto symbol = at::Symbol::fromQualString(schema.name());
for (auto op : torch::jit::getAllOperatorsFor(symbol)) {
if (toString(op->schema()) == str_schema) {
return op;
}
}
TORCH_CHECK(false, "Cannot find matching operator for schema ", str_schema);
}
} // namespace rpc
} // namespace distributed
} // namespace torch