pytorch/test/cpp/api/dispatch.cpp
Lingyi Liu 09296c34a4 Add the build for runtime dispatch for AVX, AVX2 instruction set (#26125)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/26125

We already had some optimization implementation using AVX2 for improve the quantized kernel performance. In this diff, we want to enable the runtime dispatch.

Test Plan:
Sandcastle build and test

Also test with a python binary calling into vectorized op.

torch.__config__.show()
PyTorch built with:
  - GCC 4.2
  - clang 8.0.20181009
  - Intel(R) Math Kernel Library Version 2017.0.3 Product Build 20170413 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v0.18.1 (Git Hash N/A)
  - OpenMP 1
  - **CPU capability usage: AVX2**
  - Build settings:

Reviewed By: jamesr66a

Differential Revision: D17337251

fbshipit-source-id: 8e22d10011a12a4eaf54cea3485353eb1811d828
2020-03-10 15:32:57 -07:00

53 lines
1.6 KiB
C++

#include <gtest/gtest.h>
#include <torch/torch.h>
#include <ATen/native/Pow.h>
#include <torch/types.h>
#include <torch/utils.h>
#include <test/cpp/api/support.h>
#include <iostream>
#include <vector>
#include <type_traits>
#include <cstdlib>
using namespace at;
using namespace torch::test;
struct DispatchTest : torch::test::SeedingFixture {};
TEST_F(DispatchTest, TestAVX2) {
const std::vector<int> ints {1, 2, 3, 4};
const std::vector<int> result {1, 4, 27, 256};
const auto vals_tensor = torch::tensor(ints);
const auto pows_tensor = torch::tensor(ints);
setenv("ATEN_CPU_CAPABILITY", "avx2", 1);
const auto actual_pow_avx2 = vals_tensor.pow(pows_tensor);
for (int i = 0; i < 4; i++) {
ASSERT_EQ(result[i], actual_pow_avx2[i].item<int>());
}
}
TEST_F(DispatchTest, TestAVX) {
const std::vector<int> ints {1, 2, 3, 4};
const std::vector<int> result {1, 4, 27, 256};
const auto vals_tensor = torch::tensor(ints);
const auto pows_tensor = torch::tensor(ints);
setenv("ATEN_CPU_CAPABILITY", "avx", 1);
const auto actual_pow_avx = vals_tensor.pow(pows_tensor);
for (int i = 0; i < 4; i++) {
ASSERT_EQ(result[i], actual_pow_avx[i].item<int>());
}
}
TEST_F(DispatchTest, TestDefault) {
const std::vector<int> ints {1, 2, 3, 4};
const std::vector<int> result {1, 4, 27, 256};
const auto vals_tensor = torch::tensor(ints);
const auto pows_tensor = torch::tensor(ints);
setenv("ATEN_CPU_CAPABILITY", "default", 1);
const auto actual_pow_default = vals_tensor.pow(pows_tensor);
for (int i = 0; i < 4; i++) {
ASSERT_EQ(result[i], actual_pow_default[i].item<int>());
}
}