Remove -Wno-unused-variable from utils.cmake (take 2) (#75538)

Summary:
[Comment](https://github.com/pytorch/pytorch/pull/62445/files#r680132022) claims, it got added for consistency with  top level CMakeLists.txt, but `-Wno-unused-variable` is not mentioned there.

Modify violations in 50+ files that were added in the interim by either removing unused variables, or decorating the code with `C10_UNUSED` if local variable is likely used to extend object lifetime until the end of the block.

Caused preventable revert in https://github.com/pytorch/pytorch/pull/72633#issuecomment-1092300787

Pull Request resolved: https://github.com/pytorch/pytorch/pull/75538

Reviewed By: anjali411

Differential Revision: D35747333

Pulled By: malfet

fbshipit-source-id: 3fc5828e44a4c05ba0e89e92613e6ebbdb260626
(cherry picked from commit c179fba21cfa2a0093fad50ccad5a22dd7cff52c)
This commit is contained in:
Nikita Shulga 2022-04-20 10:35:16 -07:00 committed by PyTorch MergeBot
parent 29b004be7a
commit f6c275f55d
53 changed files with 43 additions and 122 deletions

View File

@ -372,7 +372,7 @@ ivalue::TupleTypeFactory<TupleType>::fallback(const Type& type) {
for (const auto& elem : dyn.arguments().elems) {
types.emplace_back(elem.ty);
if (const auto& name = elem.label) {
fields.emplace_back(*elem.label);
fields.emplace_back(*name);
}
}
if (const auto& name = dyn.name()) {

View File

@ -1833,7 +1833,7 @@ DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_tanh_cell_dynamic, simple_hx_typ
namespace {
static auto ensure_linear_params_registered = register_linear_params();
static C10_UNUSED auto ensure_linear_params_registered = register_linear_params();
static auto cell_params_base_registry =
torch::selective_class_<CellParamsBase>("rnn", TORCH_SELECTIVE_CLASS("CellParamsBase"))

View File

@ -71,7 +71,7 @@ int register_linear_params() {
}
namespace {
static auto linear_params = register_linear_params();
static C10_UNUSED auto linear_params = register_linear_params();
} // namespace
}} // namespace ao::sparse

View File

@ -554,9 +554,9 @@ int register_embedding_params() {
namespace {
static auto conv2d_params = register_conv_params<2>();
static auto conv3d_params = register_conv_params<3>();
static auto linear_params = register_linear_params();
static auto embedding_params = register_embedding_params();
static C10_UNUSED auto conv2d_params = register_conv_params<2>();
static C10_UNUSED auto conv3d_params = register_conv_params<3>();
static C10_UNUSED auto linear_params = register_linear_params();
static C10_UNUSED auto embedding_params = register_embedding_params();
} // namespace

View File

@ -187,7 +187,6 @@ Descriptor::Set dispatch_prologue(
const Shader::Descriptor& shader_descriptor,
const Shader::WorkGroup& local_work_group_size) {
Context* const context = api::context();
const GPU gpu = context->gpu();
Descriptor& descriptor = context->descriptor();
Pipeline& pipeline = context->pipeline();
Shader& shader = context->shader();

View File

@ -199,7 +199,6 @@ Tensor cat_height(const TensorList tensors, vTensor& v_output) {
Tensor cat(
const at::TensorList tensors,
const int64_t dim) {
const auto norm_dim = normalize_dim(dim, 4);
TORCH_CHECK(
tensors.size() > 0,
"Vulkan cat expects at least one tensor");

View File

@ -348,6 +348,7 @@ _ScopeGuard<T> ScopeGuard(T f) {
stats.field.groupName.c_str(), \
__caffe_event_value_, \
##__VA_ARGS__); \
(void)__caffe_event_value_; \
}
#define CAFFE_DURATION(stats, field, ...) \

View File

@ -286,9 +286,6 @@ NO_GRADIENT(BooleanMaskLengths);
} // namespace
// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable)
const float minf = -1.0f * std::numeric_limits<float>::infinity();
// Template this on a functor object so we can generate different
// implementations at compile time and have a better chance of inlining
template <typename Functor>

View File

@ -155,7 +155,7 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
col_buffer->Resize(buffer_shape);
T* col_buffer_data = col_buffer->template mutable_data<T>();
// Im2col, followed by gemm.
for (const auto image_id : c10::irange(N)) {
for (C10_UNUSED const auto image_id : c10::irange(N)) {
for (const auto group_id : c10::irange(group_)) {
DeformableIm2col(
Xdata + group_id * input_offset,
@ -342,7 +342,7 @@ bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
}
for (const auto image_id : c10::irange(N)) {
for (C10_UNUSED const auto image_id : c10::irange(N)) {
for (const auto group_id : c10::irange(group_)) {
math::Gemm<T, Context>(
CblasTrans,

View File

@ -62,7 +62,7 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
const int64_t num_bounds_per_group,
const int64_t num_group) {
const T* start = bounds;
for (const auto i : c10::irange(num_group)) {
for (C10_UNUSED const auto i : c10::irange(num_group)) {
if (!std::is_sorted(start, start + num_bounds_per_group)) {
return false;
}

View File

@ -36,7 +36,7 @@ void Decode(
}
int sz = output->numel();
for (const auto i : c10::irange(sz)) {
for (C10_UNUSED const auto i : c10::irange(sz)) {
DCHECK_LE(*code_ptr, cb_size);
*out_ptr++ = cb_ptr[*code_ptr++];
}

View File

@ -229,8 +229,8 @@ void ROIAlignForward(
for (const auto pw : c10::irange(pooled_width)) {
vector<int32_t> acc_buffer(channels, 0);
for (const auto iy : c10::irange(roi_bin_grid_h)) {
for (const auto ix : c10::irange(roi_bin_grid_w)) {
for (C10_UNUSED const auto iy : c10::irange(roi_bin_grid_h)) {
for (C10_UNUSED const auto ix : c10::irange(roi_bin_grid_w)) {
PreCalc pc = pre_calc[pre_calc_index];
const uint8_t* data_1 = offset_bottom_data + channels * pc.pos1;

View File

@ -216,7 +216,7 @@ static void Im2ColNHWC(
T* data_col_temp =
data_col + h * width_col * kernel_h * kernel_w * channels;
int w_pad = -pad_l;
for (const auto w : c10::irange(width_col)) {
for (C10_UNUSED const auto w : c10::irange(width_col)) {
int r = 0;
for (int ih = h_pad; ih < h_pad + dkernel_h; ih += dilation_h, ++r) {
int s = 0;

View File

@ -18,16 +18,11 @@
namespace caffe2 {
// Constants for user tracepoints
// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable)
static constexpr int SDT_NONBLOCKING_OP = 0;
// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable)
static constexpr int SDT_BLOCKING_OP = 1;
// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable)
static constexpr uint64_t SDT_TIMEOUT = (uint64_t)-1;
// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable)
static constexpr uint64_t SDT_ABORT = (uint64_t)-2;
// NOLINTNEXTLINE(clang-diagnostic-unused-const-variable)
static constexpr uint64_t SDT_CANCEL = (uint64_t)-3;
C10_UNUSED static constexpr int SDT_NONBLOCKING_OP = 0;
C10_UNUSED static constexpr int SDT_BLOCKING_OP = 1;
C10_UNUSED static constexpr uint64_t SDT_TIMEOUT = (uint64_t)-1;
C10_UNUSED static constexpr uint64_t SDT_ABORT = (uint64_t)-2;
C10_UNUSED static constexpr uint64_t SDT_CANCEL = (uint64_t)-3;
BlobsQueue::BlobsQueue(
Workspace* ws,
@ -66,8 +61,7 @@ bool BlobsQueue::blockingRead(
float timeout_secs) {
Timer readTimer;
auto keeper = this->shared_from_this();
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
const auto& name = name_.c_str();
C10_UNUSED const auto& name = name_.c_str();
CAFFE_SDT(queue_read_start, name, (void*)this, SDT_BLOCKING_OP);
std::unique_lock<std::mutex> g(mutex_);
auto canRead = [this]() {
@ -76,7 +70,6 @@ bool BlobsQueue::blockingRead(
};
// Decrease queue balance before reading to indicate queue read pressure
// is being increased (-ve queue balance indicates more reads than writes)
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, queue_balance, -1);
if (timeout_secs > 0) {
std::chrono::milliseconds timeout_ms(int(timeout_secs * 1000));
@ -99,17 +92,14 @@ bool BlobsQueue::blockingRead(
CAFFE_ENFORCE(inputs.size() >= result.size());
for (const auto i : c10::irange(result.size())) {
auto bytes = BlobStat::sizeBytes(*result[i]);
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, queue_dequeued_bytes, bytes, i);
using std::swap;
swap(*(inputs[i]), *(result[i]));
}
CAFFE_SDT(queue_read_end, name, (void*)this, writer_ - reader_);
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, queue_dequeued_records);
++reader_;
cv_.notify_all();
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, read_time_ns, readTimer.NanoSeconds());
return true;
}
@ -117,8 +107,7 @@ bool BlobsQueue::blockingRead(
bool BlobsQueue::tryWrite(const std::vector<Blob*>& inputs) {
Timer writeTimer;
auto keeper = this->shared_from_this();
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
const auto& name = name_.c_str();
C10_UNUSED const auto& name = name_.c_str();
CAFFE_SDT(queue_write_start, name, (void*)this, SDT_NONBLOCKING_OP);
std::unique_lock<std::mutex> g(mutex_);
if (!canWrite()) {
@ -127,11 +116,9 @@ bool BlobsQueue::tryWrite(const std::vector<Blob*>& inputs) {
}
// Increase queue balance before writing to indicate queue write pressure is
// being increased (+ve queue balance indicates more writes than reads)
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, queue_balance, 1);
DCHECK(canWrite());
doWrite(inputs);
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, write_time_ns, writeTimer.NanoSeconds());
return true;
}
@ -139,13 +126,11 @@ bool BlobsQueue::tryWrite(const std::vector<Blob*>& inputs) {
bool BlobsQueue::blockingWrite(const std::vector<Blob*>& inputs) {
Timer writeTimer;
auto keeper = this->shared_from_this();
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
const auto& name = name_.c_str();
C10_UNUSED const auto& name = name_.c_str();
CAFFE_SDT(queue_write_start, name, (void*)this, SDT_BLOCKING_OP);
std::unique_lock<std::mutex> g(mutex_);
// Increase queue balance before writing to indicate queue write pressure is
// being increased (+ve queue balance indicates more writes than reads)
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, queue_balance, 1);
cv_.wait(g, [this]() { return closing_ || canWrite(); });
if (!canWrite()) {
@ -154,7 +139,6 @@ bool BlobsQueue::blockingWrite(const std::vector<Blob*>& inputs) {
}
DCHECK(canWrite());
doWrite(inputs);
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
CAFFE_EVENT(stats_, write_time_ns, writeTimer.NanoSeconds());
return true;
}
@ -178,8 +162,7 @@ bool BlobsQueue::canWrite() {
void BlobsQueue::doWrite(const std::vector<Blob*>& inputs) {
auto& result = queue_[writer_ % queue_.size()];
CAFFE_ENFORCE(inputs.size() >= result.size());
// NOLINTNEXTLINE(clang-diagnostic-unused-variable)
const auto& name = name_.c_str();
C10_UNUSED const auto& name = name_.c_str();
for (const auto i : c10::irange(result.size())) {
using std::swap;
swap(*(inputs[i]), *(result[i]));

View File

@ -449,7 +449,6 @@ function(torch_compile_options libname)
-Wall
-Wextra
-Wno-unused-parameter
-Wno-unused-variable
-Wno-unused-function
-Wno-unused-result
-Wno-unused-local-typedefs

View File

@ -5051,7 +5051,6 @@ Tensor group_norm_jvp(
Tensor group_norm_mean_jvp(
const Tensor& input_t, const Tensor& mean_p, int64_t groups) {
int64_t N = input_t.size(0);
int64_t C = input_t.size(1);
std::array<int64_t, 3> view_shape = {1, N * groups, N ? -1 : 1};
auto input_t_reshaped = input_t.view(view_shape);
return input_t_reshaped.mean({2}, false).view_as(mean_p);
@ -5062,7 +5061,6 @@ Tensor group_norm_invstd_jvp(
const Tensor& mean_p, const Tensor& invstd_p,
int64_t groups) {
int64_t N = input_p.size(0);
int64_t C = input_p.size(1);
std::vector<int64_t> view_shape = {1, N * groups, N ? -1 : 1};

View File

@ -328,7 +328,6 @@ struct KinetoThreadLocalState : public ProfilerThreadLocalStateBase {
// one uint64_t variable as key.
std::unordered_map<uint64_t, libkineto::GenericTraceActivity*>
tidSeq2activity;
uint64_t fwd_bwd_link_id = 1;
for (const auto idx : c10::irange(cpu_trace->activities.size())) {
auto& kineto_event = kineto_events_[idx];
@ -603,7 +602,6 @@ void pushProfilingCallbacks(const std::unordered_set<at::RecordScope>& scopes) {
if (!state_ptr) {
return nullptr;
}
const auto& config = state_ptr->config();
auto corr_id = next_correlation_id();
torch::profiler::impl::kineto::pushCorrelationId(corr_id);
return state_ptr->record_queue_.getSubqueue()->begin_op(fn, corr_id);

View File

@ -2836,8 +2836,9 @@ void ProcessGroupGloo::monitoredBarrier(
waitLoop(sendWorkMap);
auto elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - startTime);
using namespace std::chrono;
C10_UNUSED auto elapsedTime = duration_cast<milliseconds>(
steady_clock::now() - startTime);
}
void ProcessGroupGloo::setSequenceNumberForGroup() {

View File

@ -31,8 +31,6 @@ constexpr const char* const kNCCLAbortedCommStoreKey = "NCCLABORTEDCOMM";
namespace {
constexpr int kBytes = 8;
// RAII helper class to manage NCCL group API and CUDA free mutex.
// The destructor is allowed to throw since this helper class only
// manages group and lock lifetimes.
@ -440,10 +438,6 @@ void ProcessGroupNCCL::WorkNCCL::synchronizeInternal(
// In case of blocking, wait for the operation to complete.
if (blockingWait_) {
// Use the passed in timeout if provided, otherwise use the default
// opTimeout for each WorkNCCL object.
std::chrono::milliseconds workTimeout =
timeout == kNoTimeout ? opTimeout_ : timeout;
// Wait for the operation to complete.
while (!isCompleted()) {
if (timedOut()) {

View File

@ -1492,7 +1492,6 @@ TensorView* gather(
". Padding right: ",
pad_right);
const auto out_stop_offset = inp_stop_offset.value() + extent_adjustment;
Val* out_axis_dim = nullptr;
out_root_domains.push_back(IrBuilder::create<IterDomain>(
FusionGuard::getCurFusion()->zeroVal(),
inp_axis->extent(),

View File

@ -938,14 +938,13 @@ class CudaKernelGenerator : private OptOutConstDispatch {
indent() << genMmaOp(mma, true) << "(reinterpret_cast<Array<"
<< mma->out()->getDataType().value() << ","
<< getOutputRegisterSize(mma->options().macro) << ","
<< getOutputRegisterSize(mma->options().macro) << ">*>"
<< getOutputRegisterSize(options.macro) << ","
<< getOutputRegisterSize(options.macro) << ">*>"
<< "(&" << gen(uop->out()) << "));\n";
}
void handle(const MmaOp* mma) final {
auto options = mma->options();
auto in_a = mma->inA()->as<kir::TensorIndex>();
auto out = mma->out()->as<kir::TensorIndex>();
indent() << genMmaOp(mma) << "(\n";
indent() << kTab << "reinterpret_cast<Array<"
@ -967,7 +966,6 @@ class CudaKernelGenerator : private OptOutConstDispatch {
void handle(const BroadcastOp* stmt) final {
TORCH_INTERNAL_ASSERT(stmt->out()->isA<kir::TensorIndex>());
const auto tensor_index = stmt->out()->as<kir::TensorIndex>();
const ParallelTypeBitmap parallel_types =
kernel_->summary().broadcast_parallel_types.at(stmt);
@ -1313,7 +1311,6 @@ class CudaKernelGenerator : private OptOutConstDispatch {
TORCH_INTERNAL_ASSERT(rop->isFused());
const auto out = rop->out()->as<kir::TensorIndex>();
const auto domain = out->view()->domain();
const auto data_type = rop->out()->dtype();
const auto op_type = rop->getReductionOpType();
@ -1384,11 +1381,6 @@ class CudaKernelGenerator : private OptOutConstDispatch {
parallel_types.hasBID(),
"GridBroadcast needs to be used with a broadcast op that is parallelized with the BID parallel types");
const auto out = bop->out()->as<kir::TensorIndex>();
const auto domain = out->view()->domain();
const auto data_type = bop->out()->dtype();
TORCH_INTERNAL_ASSERT(
grop->broadcast_buffer()->buffer()->isA<TensorView>());
TORCH_INTERNAL_ASSERT(grop->sync_buffer()->buffer()->isA<TensorView>());
@ -1499,7 +1491,6 @@ class CudaKernelGenerator : private OptOutConstDispatch {
TORCH_INTERNAL_ASSERT(wop->isFused());
const auto out = wop->out()->as<kir::TensorIndex>();
const auto domain = out->view()->domain();
const auto data_type = wop->outAvg()->dtype();
const auto index_type = wop->outN()->dtype();

View File

@ -548,7 +548,7 @@ FusionExecutor::GlobalBuffers FusionExecutor::allocGlobalVals(
FUSER_PERF_SCOPE("FusionExecutor::AllocGlobalVals");
GlobalBuffers global_buffers;
const auto kernel = lowered_->kernel();
const auto& kernel_summary = lowered_->kernel()->summary();
const auto& kernel_summary = kernel->summary();
for (auto alloc : kernel_summary.global_allocations) {
TORCH_INTERNAL_ASSERT(
alloc->buffer()->isA<TensorView>(),

View File

@ -611,9 +611,6 @@ void validateAlignedVectorizedTensors(
// Verify extents of aligned vectorized tensors
for (const auto& vec_info : kernel->summary().vectorized_set_info) {
auto in_tv = vec_info.producer_tv;
auto out_tv = vec_info.consumer_tv;
if (vec_info.vectorized_leaf_id->getParallelType() ==
ParallelType::Vectorize) {
validateAlignedVectorizeExtents(vec_info, expr_eval);

View File

@ -2303,7 +2303,7 @@ void separateNestedViews(Node* cuda_fusion_group) {
auto parent = parent_value->node();
auto grandparent_value = parent->input(0);
auto grandparent = grandparent_value->node();
C10_UNUSED auto grandparent = grandparent_value->node();
// Before: gp -> x -> n
// After: gp -> x / gp -> n

View File

@ -814,8 +814,6 @@ indexMapFromTV(
kir::ForLoop* alloc_loop,
bool as_consumer,
kir::ForLoop* double_buffer_loop = nullptr) {
const auto gpu_lower = GpuLower::current();
bool within_alloc = false;
if (alloc_loop == nullptr) {
within_alloc = true;

View File

@ -142,7 +142,7 @@ void Scope::insert(size_t pos, Expr* expr) {
void Scope::erase(std::vector<Expr*>::const_iterator pos) {
// Remove the scope of the expr if this is the scope
auto expr = *pos;
C10_UNUSED auto expr = *pos;
exprs_.erase(pos);
}

View File

@ -58,9 +58,6 @@ class AllocationInserter : public kir::ExprMutator {
// Fills info.buffer, info.alloc_pos, info.init_for_loop,
// info.init_place_before, info.alloc_for_loop, info.alloc_place_before
void fillAllocationInformation(AllocationInformation& info, Expr* expr) {
size_t alloc_pos = 0;
kir::ForLoop* init_for_loop = nullptr;
size_t fl_idx_next = 0;
auto loop_alloc_info =
loop_utils::getAllocInformation(info.buffer, for_loops_);

View File

@ -215,8 +215,6 @@ class DoubleBufferLoopCloner : public kir::IrVisitor {
}
void handle(kir::ForLoop* fl) final {
const auto gpu_lower = GpuLower::current();
kir::ForLoop* cloned_loop = fl == double_buffer_loop_
? cloned_top_level_loop_
: IrBuilder::create<kir::ForLoop>(fl);

View File

@ -326,8 +326,6 @@ void HaloInfo::insertToInheritanceMap(
void HaloInfo::initializeFromRootAxisInfo(IterDomain* id) {
TORCH_INTERNAL_ASSERT(hasRootAxisInfo(id));
auto gpu_lower = GpuLower::current();
const auto& halo_info = getRootAxisInfo(id);
auto halo_width = halo_info.width();
@ -350,8 +348,6 @@ void HaloInfo::setHaloWidth(IterDomain* id, int halo_width) {
// Propagate extent information from root axes to descendants
void HaloInfo::build(TensorDomain* td) {
auto gpu_lower = GpuLower::current();
auto exprs = DependencyCheck::getAllExprsBetween(
{td->getMaybeRFactorDomain().begin(), td->getMaybeRFactorDomain().end()},
{td->domain().begin(), td->domain().end()});

View File

@ -251,7 +251,7 @@ void OptOutMutator::mutate(MmaOp* mma) {
auto container = mma->container();
auto options = mma->options();
container->removeExpr(mma);
auto new_mma =
C10_UNUSED auto new_mma =
IrBuilder::create<MmaOp>(container, out, in_a, in_b, init, options);
}
@ -357,7 +357,7 @@ void OptOutMutator::mutate(Split* s) {
auto container = s->container();
auto inner_split = s->innerSplit();
container->removeExpr(s);
auto new_node = IrBuilder::create<Split>(
C10_UNUSED auto new_node = IrBuilder::create<Split>(
container, ot, inr, in, fact, inner_split, start_offset, stop_offset);
}
@ -373,7 +373,7 @@ void OptOutMutator::mutate(Merge* m) {
auto container = m->container();
container->removeExpr(m);
auto new_node = IrBuilder::create<Merge>(container, ot, otr, in);
C10_UNUSED auto new_node = IrBuilder::create<Merge>(container, ot, otr, in);
}
void OptOutMutator::mutate(kir::Allocate*) {

View File

@ -93,8 +93,6 @@ void ParallelDimensionMap::populateDimensionMapWithSingleCASet(
const std::unordered_set<IterDomain*>& dom_set) {
TORCH_INTERNAL_ASSERT(dom_set.size() == 1);
const auto gpu_lower = GpuLower::current();
// pt is used by only one concrete domain
auto id = *dom_set.begin();
auto it = constant_extent_map_.find(id);
@ -119,8 +117,6 @@ void ParallelDimensionMap::populateDimensionMapWithMultipleCASet(
const std::unordered_set<IterDomain*>& dom_set) {
TORCH_INTERNAL_ASSERT(dom_set.size() > 1);
const auto gpu_lower = GpuLower::current();
bool all_equal = true;
// Use nullptr to signal it's not initialied yet
Val* known_dimension = nullptr;

View File

@ -1369,8 +1369,6 @@ class IrParser {
REGISTER_PARSE_RULE(
ptr_op,
{
auto fusion = FusionGuard::getCurFusion();
// TODO: handle channels last
MemoryFormat format;
std::list<Val*> list_val;

View File

@ -8,7 +8,6 @@ namespace fuser {
namespace cuda {
void PartialSplitMap::build(Fusion* fusion) {
const auto gpu_lower = GpuLower::current();
auto used_vals = ir_utils::allTvs(fusion);
for (auto tv : ir_utils::filterByType<TensorView>(used_vals)) {

View File

@ -444,7 +444,6 @@ void UnswitchPredicate::predicateOn(Expr* tv_expr) {
auto ref_pred_info = Index::getReferenceRootPredicates(
out_tv, for_loops_, unrolled_loop_, false);
const ReferenceTensor& reference = ref_pred_info.second;
// If RootPredicateInfo has a static predicate that is more
// restrictive than the current one, replace the current with the

View File

@ -109,7 +109,6 @@ bool canValidateIsInnerDim(
} else if (auto merge = dynamic_cast<Merge*>(expr)) {
// Might consider just rejecting merge.
auto outer = merge->outer();
auto inner = merge->inner();
if (outer->isBroadcast()) {
return false;
}

View File

@ -808,7 +808,6 @@ PersistentBufferSizeReturn persistentBufferSize(
std::vector<bool> persistent_mask(all_buffers.size(), false);
for (auto buffer_i : c10::irange(persistent_buffers.size())) {
auto buffer = all_buffers[buffer_i];
persistent_mask[buffer_i] = true;
}
@ -855,7 +854,6 @@ PersistentBufferSizeReturn persistentBufferSize(
int64_t max_persistence_size = 0;
int64_t max_proj_persistence_size = 0;
for (const auto& entry : scoped_persistence_factor) {
auto val = entry.first;
auto active_buffers = entry.second;
auto persistent_buffer_size = masked_dot_product(
persistent_mask, active_buffers, persistent_buffer_sizes);

View File

@ -254,7 +254,6 @@ class NaiveTypePropagator {
}
case aten::_batch_norm_impl_index_backward:
case aten::native_batch_norm_backward: {
int grad_input_index = 1;
int weight_index = -1;
int mask_index = -1;
if (node->kind() ==
@ -486,7 +485,6 @@ class NaiveTypePropagator {
TORCH_CHECK(
hasTypeAndDevice(in_type),
"Type and device propagation has failed, or was not provided enough information.");
const auto in_scalar_type = in_type->scalarType();
const auto in_device = in_type->device();
const auto cuda_enabled = constant_as<bool>(node->input(1));
const auto cpu_enabled = constant_as<bool>(node->input(2));

View File

@ -5543,7 +5543,6 @@ std::vector<Function*> CompilationUnit::define(
void eraseListLiterals(std::shared_ptr<Graph>& graph) {
DepthFirstGraphNodeIterator it(graph);
Node* n = nullptr;
for (auto next_node = it.next(); next_node != nullptr;) {
Node* node = next_node;

View File

@ -543,6 +543,7 @@ void IRParser::parse() {
TORCH_INTERNAL_ASSERT(dtype);
auto options = at::TensorOptions(*device).dtype(*dtype);
auto t = n->t_(attr::value, at::empty_strided(*sizes, *strides, options));
(void)t;
}
}

View File

@ -264,8 +264,7 @@ c10::impl::GenericList Function::run(
const c10::IValue& input = inputs[i];
const auto& spec = input_specs_[i];
const auto& input_tensor = input.toTensor();
TORCH_CHECK(
input_specs_[i].validate(input_tensor), "Invalid input at pos: ", i);
TORCH_CHECK(spec.validate(input_tensor), "Invalid input at pos: ", i);
args[i] = input_tensor.data_ptr();
}
offset += inputs.size();

View File

@ -42,8 +42,7 @@ struct OldOpsReplacerWithUpgraders {
get_operator_version_map().find(schema_name.value());
if (version_entry != get_operator_version_map().end()) {
const auto& entry = version_entry->second;
auto upgrader_entry =
findUpgrader(version_entry->second, current_version);
auto upgrader_entry = findUpgrader(entry, current_version);
if (!upgrader_entry.has_value()) {
if (!isOpSymbolCurrent(schema_name.value(), current_version)) {
TORCH_INTERNAL_ASSERT(

View File

@ -619,7 +619,7 @@ std::vector<SSArgument> getNodeInputShapes(Node* n, const AliasDb& db) {
for (size_t node_index = 0; node_index < n->inputs().size(); ++node_index) {
auto type = n->input(node_index)->type();
if (auto tt = type->castRaw<TensorType>()) {
if (type->castRaw<TensorType>()) {
input_shapes.push_back(tensorShapeArg(n->input(node_index)));
continue;
}

View File

@ -92,7 +92,7 @@ void RunDecompositions(Block* block) {
void RunDecompositions(std::shared_ptr<Graph> g) {
RunDecompositions(g->block());
for (const auto _ : c10::irange(2)) {
for (C10_UNUSED const auto _ : c10::irange(2)) {
PeepholeOptimize(g, /*disable_shape_peephole*/ true);
ConstantPropagation(g);
}

View File

@ -120,7 +120,6 @@ FusionStrategy setFusionStrategy(FusionStrategy& strategy) {
}
static std::atomic<size_t> num_profiled_runs{kDefaultNumProfiledRuns};
static std::atomic<size_t> bailout_depth{kDefaultBailoutDepth};
std::atomic<bool>& getProfilingMode() {
return profiling_mode;

View File

@ -92,7 +92,6 @@ bool isUnsupportedOp(Node* node) {
bool canEnableStaticRuntime(const std::shared_ptr<torch::jit::Graph>& graph) {
// check for sub-blocks
bool can_support = true;
bool has_blocks = false;
for (auto* node : graph->block()->nodes()) {
const auto kind = node->kind();
if (kind == prim::Constant) {

View File

@ -407,7 +407,6 @@ void StandardMemoryPlanner::deallocateManagedTensors() {
for (auto& ms : managed_tensors_) {
const auto& tensors = ms.group();
size_t max = ms.maxTensorSize();
auto tensor_idx = 0;
for (auto& tensor : tensors) {
const auto& storage = tensor->storage();
size_t current_size = compute_aligned_tensor_size(storage.nbytes());

View File

@ -149,7 +149,6 @@ std::vector<std::pair<BufPtr, BufPtr>> AllocBufsWithMemReuse(
}
auto start = std::get<0>(buf_ranges.at(buf));
auto end = std::get<1>(buf_ranges.at(buf));
// Release memory for buffers whose liveness range ends before the creation
// time of this buf.

View File

@ -1281,7 +1281,6 @@ void TensorExprKernel::bindConstant(const torch::jit::Value* v) {
}
auto const_tensor = toIValue(v)->toTensor();
auto scalar_type = c10::typeMetaToScalarType(const_tensor.options().dtype());
const auto& tt = v->type()->expect<TensorType>();
auto sizes = const_tensor.sizes();
std::vector<ExprHandle> te_sizes;
te_sizes.reserve(sizes.size());

View File

@ -1743,8 +1743,7 @@ int nnc_lowerings_lazy_registration() {
} // namespace
NNCLoweringFunction getStandardLoweringFor(const std::string& schema_str) {
// NOLINTNEXTLINE
static const int once = nnc_lowerings_lazy_registration();
C10_UNUSED static const int once = nnc_lowerings_lazy_registration();
const auto& lowerings = getNNCLoweringRegistry();
if (auto l = lowerings.find(parseSchema(schema_str))) {
return *l;

View File

@ -259,8 +259,6 @@ Tensor computeDequantizeExternalCall(
}
const BufHandle& qx = c10::get<BufHandle>(inputs[0]);
const double qscale = immQScale(qx);
const int64_t qzero = immQZero(qx);
const int64_t qdtype = (int64_t)immQDType(qx);
BufHandle ResultBuf("dequantize", outputShape, dtype);

View File

@ -168,7 +168,7 @@ torch::lazy::BackendDataPtr TSBackendImpl::CreateDataPlaceholder(
std::vector<torch::lazy::ComputationPtr> TSBackendImpl::Compile(
std::vector<torch::lazy::ComputationPtr> instances) const {
for (const auto& instance : instances) {
auto ts_computation =
C10_UNUSED auto ts_computation =
static_cast<torch::lazy::TSComputation*>(instance.get());
}
return instances;

View File

@ -72,7 +72,6 @@ std::list<std::pair<at::RecordFunctionHandle, int>> flattenOpIdList(c10::List<c1
}
std::list<std::pair<at::RecordFunctionHandle, int>> getInputTensorOpIds(const at::RecordFunction& fn) {
int num_inputs = fn.inputs().size();
std::pair<at::RecordFunctionHandle, int> undefined_op_pair(0,-1);
std::list<std::pair<at::RecordFunctionHandle, int>> input_producer_ops_;
auto state_ptr = NVTXThreadLocalState::getTLS();

View File

@ -35,7 +35,7 @@ ApproximateClockToUnixTimeConverter::measurePair() {
ApproximateClockToUnixTimeConverter::time_pairs
ApproximateClockToUnixTimeConverter::measurePairs() {
static constexpr auto n_warmup = 5;
for (const auto _ : c10::irange(n_warmup)) {
for (C10_UNUSED const auto _ : c10::irange(n_warmup)) {
getApproximateTime();
steady_clock_t::now();
}