mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Hi! I've been fuzzing different pytorch modules, and found a crash inside one of them. Specifically, I'm talking about a module that processes `script_call` rpc requests and a function `ScriptCall::fromIValues(std::vector<at::IValue>& ivalues)`. Running this test case causes a crash that occurs when `ivalues.back()` is called [script_call.cpp:90](abc54f9314/torch/csrc/distributed/rpc/script_call.cpp (L90)). The crash occurs because the vector `ivalues` is empty. All tests were performed on this pytorch version: [abc54f9314](abc54f9314) The provided patch checks if there are enough elements in the ivalues vector. ### How to reproduce 1. To reproduce the crash, use provided docker: [Dockerfile](https://github.com/ispras/oss-sydr-fuzz/tree/master/projects/pytorch) 2. Build the container: `docker build -t oss-sydr-fuzz-pytorch-reproduce .` 3. Copy crash file to the current directory: - [crash-9f76d4e37a2391136a4ce07d47269db1e063e4b4.zip](https://github.com/pytorch/pytorch/files/10674059/crash-9f76d4e37a2391136a4ce07d47269db1e063e4b4.zip) 4. Run the container: ``docker run --privileged --network host -v `pwd`:/homedir --rm -it oss-sydr-fuzz-pytorch-reproduce /bin/bash`` 5. And execute the binary: `/message_deserialize_fuzz /homedir/crash-9f76d4e37a2391136a4ce07d47269db1e063e4b4` After execution completes you will see this stacktrace: ```asan AddressSanitizer:DEADLYSIGNAL ================================================================= ==57==ERROR: AddressSanitizer: SEGV on unknown address (pc 0x0000008e7b19 bp 0x7ffd2fdded70 sp 0x7ffd2fddec40 T0) ==57==The signal is caused by a READ memory access. ==57==Hint: this fault was caused by a dereference of a high value address (see register values below). Disassemble the provided pc to learn which register was used. #0 0x8e7b19 in c10::IValue::isString() const /pytorch_fuzz/aten/src/ATen/core/ivalue.h:639:27 #1 0x8e7b19 in c10::IValue::toStringRef[abi:cxx11]() const /pytorch_fuzz/aten/src/ATen/core/ivalue_inl.h:2179:3 #2 0xe04fb58 in torch::distributed::rpc::ScriptCall::fromIValues(std::vector<c10::IValue, std::allocator<c10::IValue> >&) /pytorch_fuzz/torch/csrc/distributed/rpc/script_call.cpp:90:53 #3 0xe0511f0 in torch::distributed::rpc::ScriptCall::fromMessage(torch::distributed::rpc::Message const&) /pytorch_fuzz/torch/csrc/distributed/rpc/script_call.cpp:133:10 #4 0xe0ff71e in torch::distributed::rpc::deserializeRequest(torch::distributed::rpc::Message const&) /pytorch_fuzz/torch/csrc/distributed/rpc/utils.cpp:102:14 #5 0x602a41 in LLVMFuzzerTestOneInput /message_deserialize_fuzz.cc:192:27 #6 0x52ce61 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:611:15 #7 0x516d7c in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:324:6 #8 0x51cacb in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:860:9 #9 0x546062 in main /llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 #10 0x7f41e42a8082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) #11 0x51169d in _start (/message_deserialize_fuzz+0x51169d) AddressSanitizer can not provide additional info. SUMMARY: AddressSanitizer: SEGV /pytorch_fuzz/aten/src/ATen/core/ivalue.h:639:27 in c10::IValue::isString() const ==57==ABORTING ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/94297 Approved by: https://github.com/ezyang
161 lines
4.8 KiB
C++
161 lines
4.8 KiB
C++
#include <torch/csrc/distributed/rpc/rpc_agent.h>
|
|
#include <torch/csrc/distributed/rpc/script_call.h>
|
|
#include <torch/csrc/jit/serialization/pickle.h>
|
|
|
|
namespace torch {
|
|
namespace distributed {
|
|
namespace rpc {
|
|
|
|
const std::string ScriptCall::BUILTIN_OP_NAMESPACE_("torch.ops.aten.");
|
|
const std::string ScriptCall::ATEN_PREFIX_("aten::");
|
|
|
|
ScriptCall::ScriptCall(
|
|
std::shared_ptr<Operator> op,
|
|
std::vector<at::IValue>&& stack)
|
|
: op_(std::move(op)), stack_(stack), isAsyncExecution_(false) {}
|
|
|
|
ScriptCall::ScriptCall(
|
|
const c10::QualifiedName& qualifiedName,
|
|
std::vector<at::IValue>&& stack,
|
|
const bool isAsyncExecution)
|
|
: qualifiedName_(qualifiedName),
|
|
stack_(stack),
|
|
isAsyncExecution_(isAsyncExecution) {}
|
|
|
|
bool ScriptCall::hasOp() const {
|
|
return op_ ? true : false;
|
|
}
|
|
|
|
std::shared_ptr<Operator> ScriptCall::op() const {
|
|
return *op_;
|
|
}
|
|
|
|
bool ScriptCall::hasQualifiedName() const {
|
|
return qualifiedName_ ? true : false;
|
|
}
|
|
|
|
const c10::QualifiedName& ScriptCall::qualifiedName() const {
|
|
return *qualifiedName_;
|
|
}
|
|
|
|
const std::vector<at::IValue>& ScriptCall::stack() const {
|
|
return stack_;
|
|
}
|
|
|
|
std::vector<at::IValue>& ScriptCall::stackRef() {
|
|
return stack_;
|
|
}
|
|
|
|
void ScriptCall::toIValues(std::vector<at::IValue>& ivalues) const {
|
|
for (auto& value : stack_) {
|
|
ivalues.push_back(value);
|
|
}
|
|
|
|
if (hasOp()) {
|
|
TORCH_CHECK(
|
|
!hasQualifiedName(),
|
|
"It is builtin operator call, qualifiedName_ should not be set.");
|
|
// TODO: replace this with a real overload_name when FunctionSchema supports
|
|
// that.
|
|
ivalues.emplace_back(toString((*op_)->schema()));
|
|
// insert qualified name
|
|
auto opName = (*op_)->schema().name();
|
|
TORCH_CHECK(
|
|
opName.find("::") == opName.rfind("::") &&
|
|
opName.rfind(ATEN_PREFIX_) == 0,
|
|
"Unexpected operator name ",
|
|
opName);
|
|
// aten::add -> torch.ops.aten.add
|
|
opName.replace(0, ATEN_PREFIX_.length(), BUILTIN_OP_NAMESPACE_);
|
|
ivalues.emplace_back(std::move(opName));
|
|
} else if (hasQualifiedName()) {
|
|
ivalues.emplace_back(isAsyncExecution());
|
|
TORCH_CHECK(
|
|
!hasOp(),
|
|
"It is TorchScript function call, operator should not be set.");
|
|
ivalues.emplace_back((*qualifiedName_).qualifiedName());
|
|
} else {
|
|
TORCH_INTERNAL_ASSERT(
|
|
false,
|
|
"Either builtin operator or TorchScript function name should be set.");
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<ScriptCall> ScriptCall::fromIValues(
|
|
std::vector<at::IValue>& ivalues) {
|
|
TORCH_INTERNAL_ASSERT(
|
|
ivalues.size() > 1,
|
|
"At least 2 IValues are required to build a ScriptCall.");
|
|
|
|
// Last element in the vector is always qualifiedName for both
|
|
// builitin operator and TorchScript function
|
|
// If the qualifiedName is not a builtin operator name, then treat it
|
|
// as TorchScript function name
|
|
const std::string& qualifiedName = ivalues.back().toStringRef();
|
|
|
|
if (qualifiedName.rfind(BUILTIN_OP_NAMESPACE_) == 0) {
|
|
ivalues.pop_back();
|
|
const std::string& str_schema = ivalues.back().toStringRef();
|
|
auto op = matchOperator(str_schema);
|
|
|
|
ivalues.pop_back();
|
|
// remove str_schema from ivalues
|
|
return std::make_unique<ScriptCall>(op, std::move(ivalues));
|
|
} else {
|
|
ivalues.pop_back();
|
|
bool isAsyncExecution = ivalues.back().toBool();
|
|
ivalues.pop_back();
|
|
return std::make_unique<ScriptCall>(
|
|
c10::QualifiedName(qualifiedName),
|
|
std::move(ivalues),
|
|
isAsyncExecution);
|
|
}
|
|
}
|
|
|
|
c10::intrusive_ptr<Message> ScriptCall::toMessageImpl() && {
|
|
std::vector<IValue> ivalues;
|
|
toIValues(ivalues);
|
|
|
|
std::vector<torch::Tensor> tensor_table;
|
|
auto payload = jit::pickle(
|
|
c10::ivalue::Tuple::create(std::move(ivalues)), &tensor_table);
|
|
|
|
return c10::make_intrusive<Message>(
|
|
std::move(payload), std::move(tensor_table), MessageType::SCRIPT_CALL);
|
|
}
|
|
|
|
std::unique_ptr<ScriptCall> ScriptCall::fromMessage(const Message& message) {
|
|
auto payload = static_cast<const char*>(message.payload().data());
|
|
auto payload_size = message.payload().size();
|
|
auto value = jit::unpickle(
|
|
payload,
|
|
payload_size,
|
|
*RpcAgent::getCurrentRpcAgent()->getTypeResolver(),
|
|
message.tensors());
|
|
|
|
auto values = value.toTupleRef().elements().vec();
|
|
return fromIValues(values);
|
|
}
|
|
|
|
std::shared_ptr<Operator> ScriptCall::matchOperator(
|
|
const std::string& str_schema) {
|
|
// TODO: This is a temporary solution. We should pass enough information to
|
|
// allow deterministically matched to one operator.
|
|
|
|
// extract symbol from the schema
|
|
auto schema = torch::jit::parseSchema(str_schema);
|
|
auto symbol = at::Symbol::fromQualString(schema.name());
|
|
|
|
for (auto op : torch::jit::getAllOperatorsFor(symbol)) {
|
|
if (toString(op->schema()) == str_schema) {
|
|
return op;
|
|
}
|
|
}
|
|
|
|
TORCH_CHECK(false, "Cannot find matching operator for schema ", str_schema);
|
|
}
|
|
|
|
} // namespace rpc
|
|
} // namespace distributed
|
|
} // namespace torch
|