diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_deserialize.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_deserialize.cpp index 26515a1b304..2b0e2f6785b 100644 --- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_deserialize.cpp +++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_deserialize.cpp @@ -106,7 +106,7 @@ c10::intrusive_ptr PackedLinearWeight::deserialize( std::get(serialized), weight_zero_points, 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQInt8)); + at::device(c10::kCPU).dtype(c10::kQInt8)); } const at::Tensor loaded_weight_values = diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp index 196e711da7e..bda1984d620 100644 --- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp +++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp @@ -46,7 +46,7 @@ LinearPackedSerializationType PackedLinearWeight::unpack() { scales, zero_points, 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQInt8)); + at::device(c10::kCPU).dtype(c10::kQInt8)); } int8_t* weight_ptr_int8 = @@ -100,7 +100,7 @@ LinearPackedSerializationType PackedLinearWeightQnnp::unpack() { scales, zero_points, 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQInt8)); + at::device(c10::kCPU).dtype(c10::kQInt8)); } int8_t* weight_ptr_int8 = diff --git a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp index c9c09cf2464..f62db511b36 100644 --- a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp +++ b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp @@ -32,16 +32,16 @@ std::tuple> PackedLinearWeight::unpack() { {N, K}, at::device(c10::kCPU).dtype(c10::kQInt8), w_scale[0], w_zp[0]); } else if (q_scheme == c10::kPerChannelAffine) { auto scales = at::from_blob( - w_scale.data(), w_scale.size(), device(c10::kCPU).dtype(c10::kFloat)); + w_scale.data(), w_scale.size(), at::device(c10::kCPU).dtype(c10::kFloat)); auto zero_points = at::from_blob( - w_zp.data(), w_zp.size(), device(c10::kCPU).dtype(c10::kInt)); + w_zp.data(), w_zp.size(), at::device(c10::kCPU).dtype(c10::kInt)); weight_origin = at::_empty_per_channel_affine_quantized( {N, K}, scales.toType(c10::kDouble), zero_points.toType(c10::kLong), 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQInt8)); + at::device(c10::kCPU).dtype(c10::kQInt8)); } int8_t* weight_ptr_int8 = @@ -81,7 +81,7 @@ std::tuple> PackedLinearWeightsQnnp:: auto scales = at::from_blob( weight_scales_data, w_scales.sizes()[0] - kPaddingChannels, - device(c10::kCPU).dtype(c10::kFloat)); + at::device(c10::kCPU).dtype(c10::kFloat)); at::Tensor zero_points = at::empty( w_zero_points.size() - kPaddingChannels, at::device(c10::kCPU).dtype(c10::kLong)); @@ -93,7 +93,7 @@ std::tuple> PackedLinearWeightsQnnp:: scales, zero_points.toType(c10::kLong), 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQInt8)) + at::device(c10::kCPU).dtype(c10::kQInt8)) .contiguous(); } else { TORCH_INTERNAL_ASSERT(false, "Unsupported quantization scheme."); diff --git a/aten/src/ATen/native/quantized/cpu/qconv.cpp b/aten/src/ATen/native/quantized/cpu/qconv.cpp index 9acdd0937c8..98e9a5af8a9 100644 --- a/aten/src/ATen/native/quantized/cpu/qconv.cpp +++ b/aten/src/ATen/native/quantized/cpu/qconv.cpp @@ -448,7 +448,7 @@ at::Tensor PackedConvWeight::apply_impl( at::Tensor output = kSpatialDim == 2 ? at::_empty_affine_quantized( output_shape, - device(c10::kCPU) + at::device(c10::kCPU) .dtype(c10::kQUInt8) .memory_format(c10::MemoryFormat::ChannelsLast), output_scale, @@ -460,7 +460,7 @@ at::Tensor PackedConvWeight::apply_impl( output_shape[2], output_shape[3], output_shape[4], - device(c10::kCPU).dtype(c10::kQUInt8), + at::device(c10::kCPU).dtype(c10::kQUInt8), output_scale, output_zero_point); at::Tensor buffer = @@ -1225,7 +1225,7 @@ at::Tensor PackedConvWeightsOnednn::apply_impl( ideep::dims dst_dims = ideep::dims({output_sizes.cbegin(), output_sizes.cend()}); at::Tensor output = at::_empty_affine_quantized( dst_dims, - device(c10::kCPU) + at::device(c10::kCPU) .dtype(c10::kQUInt8) .memory_format(kSpatialDim == 2 ? c10::MemoryFormat::ChannelsLast : @@ -1593,7 +1593,7 @@ static at::Tensor _quantized_convolution_onednn( accum.value() : at::empty( dst_dims, - device(c10::kCPU) + at::device(c10::kCPU) .dtype(fp32_output ? c10::kFloat : (bfloat16_output ? c10::kBFloat16 : c10::kByte)) .memory_format(kSpatialDim == 2 ? c10::MemoryFormat::ChannelsLast : diff --git a/aten/src/ATen/native/quantized/cpu/qconv_unpack_impl.cpp b/aten/src/ATen/native/quantized/cpu/qconv_unpack_impl.cpp index 83273f979e9..9ade8d3f212 100644 --- a/aten/src/ATen/native/quantized/cpu/qconv_unpack_impl.cpp +++ b/aten/src/ATen/native/quantized/cpu/qconv_unpack_impl.cpp @@ -37,7 +37,7 @@ std::tuple> PackedConvWeight< unpacked_weights = kSpatialDim == 2 ? at::_empty_affine_quantized( {output_channels, C_per_G, kernel_h, kernel_w}, - device(c10::kCPU) + at::device(c10::kCPU) .dtype(c10::kQInt8) .memory_format(c10::MemoryFormat::ChannelsLast), w_scale[0], @@ -50,7 +50,7 @@ std::tuple> PackedConvWeight< kernel_d, kernel_h, kernel_w, - device(c10::kCPU).dtype(c10::kQInt8), + at::device(c10::kCPU).dtype(c10::kQInt8), w_scale[0], w_zp[0]); } else if (q_scheme == c10::kPerChannelAffine) { @@ -58,16 +58,16 @@ std::tuple> PackedConvWeight< !transpose(), "Per Channel Quantization is currently disabled for transposed conv"); auto scales = at::from_blob( - w_scale.data(), w_scale.size(), device(c10::kCPU).dtype(c10::kFloat)); + w_scale.data(), w_scale.size(), at::device(c10::kCPU).dtype(c10::kFloat)); auto zero_points = at::from_blob( - w_zp.data(), w_zp.size(), device(c10::kCPU).dtype(c10::kInt)); + w_zp.data(), w_zp.size(), at::device(c10::kCPU).dtype(c10::kInt)); unpacked_weights = kSpatialDim == 2 ? at::_empty_per_channel_affine_quantized( {output_channels, C_per_G, kernel_h, kernel_w}, scales.toType(c10::kDouble), zero_points.toType(c10::kLong), 0, /* The output channel axis is 0 */ - device(c10::kCPU).dtype(c10::kQInt8), + at::device(c10::kCPU).dtype(c10::kQInt8), c10::MemoryFormat::ChannelsLast) : at::native::fbgemm_utils:: MakeEmptyPerChannelAffineQuantizedChannelsLast3dTensor( @@ -76,7 +76,7 @@ std::tuple> PackedConvWeight< kernel_d, kernel_h, kernel_w, - device(c10::kCPU).dtype(c10::kQInt8), + at::device(c10::kCPU).dtype(c10::kQInt8), scales.toType(c10::kDouble), zero_points.toType(c10::kLong)); } else { diff --git a/aten/src/ATen/native/quantized/cpu/qembeddingbag_unpack.cpp b/aten/src/ATen/native/quantized/cpu/qembeddingbag_unpack.cpp index 7c023ffd56a..10e152aa0f1 100644 --- a/aten/src/ATen/native/quantized/cpu/qembeddingbag_unpack.cpp +++ b/aten/src/ATen/native/quantized/cpu/qembeddingbag_unpack.cpp @@ -44,9 +44,9 @@ at::Tensor PackedEmbeddingBagWeight::unpack() { num_elem_per_byte}; auto scales = at::from_blob( - w_scale.data(), w_scale.size(), device(c10::kCPU).dtype(c10::kFloat)); + w_scale.data(), w_scale.size(), at::device(c10::kCPU).dtype(c10::kFloat)); auto zero_points = at::from_blob( - w_zp.data(), w_zp.size(), device(c10::kCPU).dtype(c10::kFloat)); + w_zp.data(), w_zp.size(), at::device(c10::kCPU).dtype(c10::kFloat)); auto output_columns = output_shape[1]; uint8_t* output_data = nullptr; @@ -58,7 +58,7 @@ at::Tensor PackedEmbeddingBagWeight::unpack() { scales.toType(c10::kFloat), zero_points.toType(c10::kFloat), 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQUInt8)); + at::device(c10::kCPU).dtype(c10::kQUInt8)); output_data = static_cast(weight_origin.data_ptr()); } else { // We create empty qtensor with the full output shape, and dtype set to @@ -69,7 +69,7 @@ at::Tensor PackedEmbeddingBagWeight::unpack() { scales.toType(c10::kFloat), zero_points.toType(c10::kFloat), 0, // The output channel axis is 0 - device(c10::kCPU).dtype(c10::kQUInt4x2)); + at::device(c10::kCPU).dtype(c10::kQUInt4x2)); output_data = static_cast(weight_origin.data_ptr()); } diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp index f1a81269a44..10f34a685f3 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp @@ -1015,7 +1015,7 @@ static at::Tensor linear_int8_with_onednn_weight( other.value() : at::empty( dst_dims, - device(c10::kCPU) + at::device(c10::kCPU) .dtype(fp32_output ? c10::kFloat : (bf16_output ? c10::kBFloat16 : c10::kByte)) ); if (output.numel() == 0) { diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp index 5db6a6e14c4..e2d5278d579 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp @@ -652,7 +652,7 @@ static at::Tensor linear_dynamic_fp16_with_onednn_weight( std::vector dst_dims = {M, N}; at::Tensor output = at::empty( dst_dims, - device(c10::kCPU) + at::device(c10::kCPU) .dtype(c10::kFloat) ); if (output.numel() == 0) {