mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 00:21:07 +01:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/26125 We already had some optimization implementation using AVX2 for improve the quantized kernel performance. In this diff, we want to enable the runtime dispatch. Test Plan: Sandcastle build and test Also test with a python binary calling into vectorized op. torch.__config__.show() PyTorch built with: - GCC 4.2 - clang 8.0.20181009 - Intel(R) Math Kernel Library Version 2017.0.3 Product Build 20170413 for Intel(R) 64 architecture applications - Intel(R) MKL-DNN v0.18.1 (Git Hash N/A) - OpenMP 1 - **CPU capability usage: AVX2** - Build settings: Reviewed By: jamesr66a Differential Revision: D17337251 fbshipit-source-id: 8e22d10011a12a4eaf54cea3485353eb1811d828
53 lines
1.6 KiB
C++
53 lines
1.6 KiB
C++
#include <gtest/gtest.h>
|
|
|
|
#include <torch/torch.h>
|
|
#include <ATen/native/Pow.h>
|
|
#include <torch/types.h>
|
|
#include <torch/utils.h>
|
|
#include <test/cpp/api/support.h>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <type_traits>
|
|
#include <cstdlib>
|
|
|
|
using namespace at;
|
|
using namespace torch::test;
|
|
|
|
struct DispatchTest : torch::test::SeedingFixture {};
|
|
|
|
TEST_F(DispatchTest, TestAVX2) {
|
|
const std::vector<int> ints {1, 2, 3, 4};
|
|
const std::vector<int> result {1, 4, 27, 256};
|
|
const auto vals_tensor = torch::tensor(ints);
|
|
const auto pows_tensor = torch::tensor(ints);
|
|
setenv("ATEN_CPU_CAPABILITY", "avx2", 1);
|
|
const auto actual_pow_avx2 = vals_tensor.pow(pows_tensor);
|
|
for (int i = 0; i < 4; i++) {
|
|
ASSERT_EQ(result[i], actual_pow_avx2[i].item<int>());
|
|
}
|
|
}
|
|
|
|
TEST_F(DispatchTest, TestAVX) {
|
|
const std::vector<int> ints {1, 2, 3, 4};
|
|
const std::vector<int> result {1, 4, 27, 256};
|
|
const auto vals_tensor = torch::tensor(ints);
|
|
const auto pows_tensor = torch::tensor(ints);
|
|
setenv("ATEN_CPU_CAPABILITY", "avx", 1);
|
|
const auto actual_pow_avx = vals_tensor.pow(pows_tensor);
|
|
for (int i = 0; i < 4; i++) {
|
|
ASSERT_EQ(result[i], actual_pow_avx[i].item<int>());
|
|
}
|
|
}
|
|
|
|
TEST_F(DispatchTest, TestDefault) {
|
|
const std::vector<int> ints {1, 2, 3, 4};
|
|
const std::vector<int> result {1, 4, 27, 256};
|
|
const auto vals_tensor = torch::tensor(ints);
|
|
const auto pows_tensor = torch::tensor(ints);
|
|
setenv("ATEN_CPU_CAPABILITY", "default", 1);
|
|
const auto actual_pow_default = vals_tensor.pow(pows_tensor);
|
|
for (int i = 0; i < 4; i++) {
|
|
ASSERT_EQ(result[i], actual_pow_default[i].item<int>());
|
|
}
|
|
}
|