[vulkan] glsl shaders relaxed precision mode to cmake option (#43076)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/43076

Test Plan: Imported from OSS

Reviewed By: AshkanAliabadi

Differential Revision: D23143354

Pulled By: IvanKobzarev

fbshipit-source-id: 7b3ead1e63cf8acf6e8e547080a8ead7a2db994b
This commit is contained in:
Ivan Kobzarev 2020-09-16 12:45:08 -07:00 committed by Facebook GitHub Bot
parent e9c6449b46
commit 6debe825be
20 changed files with 175 additions and 60 deletions

View File

@ -201,6 +201,7 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
option(USE_VULKAN "Use Vulkan GPU backend" OFF)
option(USE_VULKAN_WRAPPER "Use Vulkan wrapper" ON)
option(USE_VULKAN_SHADERC_RUNTIME "Use Vulkan Shader compilation runtime(Needs shaderc lib)" OFF)
option(USE_VULKAN_RELAXED_PRECISION "Use Vulkan relaxed precision(mediump)" OFF)
option(USE_XNNPACK "Use XNNPACK" ON)
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ZSTD "Use ZSTD" OFF)
@ -513,6 +514,10 @@ if(USE_VULKAN_SHADERC_RUNTIME)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_SHADERC_RUNTIME")
endif()
if(USE_VULKAN_RELAXED_PRECISION)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_RELAXED_PRECISION")
endif()
# ---[ Allowlist file if allowlist is specified
include(cmake/Allowlist.cmake)

View File

@ -3,9 +3,11 @@
import argparse
import sys
import os
from tools.codegen.code_template import CodeTemplate
H_NAME = "glsl.h"
CPP_NAME = "glsl.cpp"
DEFAULT_ENV = {"precision": "highp"}
def findAllGlsls(path):
cmd = "find " + path + " -name \"*.glsl\""
@ -20,7 +22,7 @@ def findAllGlsls(path):
def getName(filePath):
return os.path.basename(filePath).replace("/", "_").replace(".", "_")
def genCppH(hFilePath, cppFilePath, glsls):
def genCppH(hFilePath, cppFilePath, templateGlslPaths, tmpDirPath, env):
print("hFilePath:{}".format(hFilePath))
print("cppFilePath:{}".format(cppFilePath))
h = "#pragma once\n"
@ -32,16 +34,21 @@ def genCppH(hFilePath, cppFilePath, glsls):
cpp = "#include <ATen/native/vulkan/{}>".format(H_NAME)
cpp += nsbegin
for s in glsls:
name = getName(s)
for templateGlslPath in templateGlslPaths:
name = getName(templateGlslPath)
h += "extern const char* " + name + ";\n"
cpp += "const char* " + name + " = \n"
with open(s) as f:
lines = f.read().split("\n")
for l in lines:
if (len(l) < 1):
continue
cpp += "\"" + l + "\\n\"\n"
codeTemplate = CodeTemplate.from_file(templateGlslPath)
srcPath = tmpDirPath + "/" + name + ".glsl"
content = codeTemplate.substitute(env)
lines = content.split("\n")
for l in lines:
if (len(l) < 1):
continue
cpp += "\"" + l + "\\n\"\n"
cpp += ";\n"
cpp += nsend
@ -52,6 +59,18 @@ def genCppH(hFilePath, cppFilePath, glsls):
with open(cppFilePath, "w") as f:
f.write(cpp)
def parse_arg_env(items):
d = {}
if items:
for item in items:
tokens = item.split("=")
key = tokens[0].strip()
value = tokens[1].strip()
d[key] = value
return d
def main(argv):
parser = argparse.ArgumentParser(description='Generate glsl.cpp and glsl.h containing glsl sources')
parser.add_argument(
@ -65,13 +84,32 @@ def main(argv):
'--output-path',
help='path to directory to generate glsl.h glsl.cpp (cpp namespace at::native::vulkan)',
required=True)
parser.add_argument(
'-t',
'--tmp-dir-path',
required=True,
help='/tmp')
parser.add_argument(
"--env",
metavar="KEY=VALUE",
nargs='*',
help="Set a number of key-value pairs")
options = parser.parse_args()
if not os.path.exists(options.tmp_dir_path):
os.makedirs(options.tmp_dir_path)
env = DEFAULT_ENV
for key, value in parse_arg_env(options.env).items():
env[key] = value
if not os.path.exists(options.output_path):
os.makedirs(options.output_path)
glsls = findAllGlsls(options.glsl_path)
genCppH(options.output_path + "/" + H_NAME, options.output_path + "/" + CPP_NAME, glsls)
genCppH(
options.output_path + "/" + H_NAME, options.output_path + "/" + CPP_NAME,
glsls,
tmpDirPath=options.tmp_dir_path,
env=env)
if __name__ == '__main__':
sys.exit(main(sys.argv))

View File

@ -5,36 +5,50 @@ import array
import os
import sys
import subprocess
from tools.codegen.code_template import CodeTemplate
H_NAME = "spv.h"
CPP_NAME = "spv.cpp"
DEFAULT_ENV = {"precision": "highp"}
def getName(filePath):
return os.path.basename(filePath).replace("/", "_").replace(".", "_")
def genCppH(hFilePath, cppFilePath, srcDirPath, glslcPath, tmpDirPath):
def genCppH(hFilePath, cppFilePath, srcDirPath, glslcPath, tmpDirPath, env):
print("hFilePath:{} cppFilePath:{} srcDirPath:{} glslcPath:{} tmpDirPath:{}".format(
hFilePath, cppFilePath, srcDirPath, glslcPath, tmpDirPath))
cmd = "find " + srcDirPath + " -name \"*.glsl\""
vexs = os.popen(cmd).read().split('\n')
srcPaths = []
templateSrcPaths = []
for f in vexs:
if len(f) > 1:
srcPaths.append(f)
srcPaths.sort()
print("srcPaths:{}".format(srcPaths))
templateSrcPaths.append(f)
templateSrcPaths.sort()
print("templateSrcPaths:{}".format(templateSrcPaths))
spvPaths = []
for srcPath in srcPaths:
print("srcPath {}".format(srcPath))
name = getName(srcPath).replace("_glsl", "")
for templateSrcPath in templateSrcPaths:
print("templateSrcPath {}".format(templateSrcPath))
name = getName(templateSrcPath).replace("_glsl", "")
print("name {}".format(name))
codeTemplate = CodeTemplate.from_file(templateSrcPath)
srcPath = tmpDirPath + "/" + name + ".glsl"
content = codeTemplate.substitute(env)
with open(srcPath, 'w') as f:
f.write(content)
spvPath = tmpDirPath + "/" + name + ".spv"
print("spvPath {}".format(spvPath))
cmd = [glslcPath, "-fshader-stage=compute", srcPath, "-o", spvPath, "--target-env=vulkan1.0"]
cmd = [
glslcPath, "-fshader-stage=compute",
srcPath, "-o", spvPath,
"--target-env=vulkan1.0",
"-Werror"
]
print("\nglslc cmd:", cmd)
subprocess.check_call(cmd)
@ -74,6 +88,18 @@ def genCppH(hFilePath, cppFilePath, srcDirPath, glslcPath, tmpDirPath):
with open(cppFilePath, "w") as f:
f.write(cpp)
def parse_arg_env(items):
d = {}
if items:
for item in items:
tokens = item.split("=")
key = tokens[0].strip()
value = tokens[1].strip()
d[key] = value
return d
def main(argv):
parser = argparse.ArgumentParser(description='')
parser.add_argument(
@ -88,7 +114,7 @@ def main(argv):
help='')
parser.add_argument(
'-t',
'--tmp-spv-path',
'--tmp-dir-path',
required=True,
help='/tmp')
parser.add_argument(
@ -96,20 +122,29 @@ def main(argv):
'--output-path',
required=True,
help='')
parser.add_argument(
"--env",
metavar="KEY=VALUE",
nargs='*',
help="Set a number of key-value pairs")
options = parser.parse_args()
env = DEFAULT_ENV
for key, value in parse_arg_env(options.env).items():
env[key] = value
if not os.path.exists(options.output_path):
os.makedirs(options.output_path)
if not os.path.exists(options.tmp_spv_path):
os.makedirs(options.tmp_spv_path)
if not os.path.exists(options.tmp_dir_path):
os.makedirs(options.tmp_dir_path)
genCppH(
hFilePath=options.output_path + "/spv.h",
cppFilePath=options.output_path + "/spv.cpp",
srcDirPath=options.glsl_path,
glslcPath=options.glslc_path,
tmpDirPath=options.tmp_spv_path)
tmpDirPath=options.tmp_dir_path,
env=env)
if __name__ == '__main__':
sys.exit(main(sys.argv))

View File

@ -833,7 +833,9 @@ void ComputeUnit::createComputePipelineCompile(
const WorkGroupSize workGroupSize) {
shaderc::Compiler compiler{};
shaderc::CompileOptions options{};
#ifdef DEBUG
options.SetGenerateDebugInfo();
#endif
options.SetTargetEnvironment(
shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_0);
options.SetForcedVersionProfile(450, shaderc_profile_core);

View File

@ -455,9 +455,9 @@ class ComputeUnit final {
#ifdef USE_VULKAN_SHADERC_RUNTIME
void createComputePipelineCompile(
const std::string& glslSrc,
VkPipelineCache pipelineCache,
VkDescriptorSetLayout descrSetLayout,
WorkGroupSize workGroupSize);
const VkPipelineCache pipelineCache,
const VkDescriptorSetLayout& descrSetLayout,
const WorkGroupSize workGroupSize);
#endif
void createCommandBuffer(VkDescriptorSet& descriptorSet);

View File

@ -570,7 +570,11 @@ void add(VulkanTensor& output, const VulkanTensor& input, const float s) {
int32_t inputSize[4];
float s;
};
ConstBlock cb{{W, H, C_4, 0}, s};
ConstBlock cb{{safe_downcast<int32_t>(W),
safe_downcast<int32_t>(H),
safe_downcast<int32_t>(C_4),
0},
s};
VBuffer constBuffer = makeUniformConstBuffer((void*)&cb, sizeof(cb));
VkDescriptorSetLayout descriptorSetLayout{};
@ -619,7 +623,11 @@ void mul(VulkanTensor& output, const VulkanTensor& input, const float s) {
int32_t inputSize[4];
float s;
};
ConstBlock cb{{W, H, C_4, 0}, s};
ConstBlock cb{{safe_downcast<int32_t>(W),
safe_downcast<int32_t>(H),
safe_downcast<int32_t>(C_4),
0},
s};
VBuffer constBuffer = makeUniformConstBuffer((void*)&cb, sizeof(cb));
VkDescriptorSetLayout descriptorSetLayout{};

View File

@ -1,7 +1,8 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly highp uniform image3D uOutput;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) readonly buffer kernel {
vec4 data[];
}

View File

@ -1,8 +1,9 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly highp uniform image3D uOutput;
layout(set = 0, binding = 1) uniform highp sampler3D uInput;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform constBlock {
int IW;
int IH;

View File

@ -1,10 +1,11 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uInput0;
layout(set = 0, binding = 2) uniform mediump sampler3D uInput1;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput0;
layout(set = 0, binding = 2) uniform PRECISION sampler3D uInput1;
layout(set = 0, binding = 3) uniform constBlock {
int W;
int H;

View File

@ -1,9 +1,10 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uM1;
layout(set = 0, binding = 2) uniform mediump sampler3D uM2;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uM1;
layout(set = 0, binding = 2) uniform PRECISION sampler3D uM2;
layout(set = 0, binding = 3) uniform constBlock {
ivec4 outputSize;
float beta;
@ -11,7 +12,7 @@ layout(set = 0, binding = 3) uniform constBlock {
int K;
}
uConstBlock;
layout(set = 0, binding = 4) uniform mediump sampler3D uT;
layout(set = 0, binding = 4) uniform PRECISION sampler3D uT;
layout(local_size_x_id = 1, local_size_y_id = 2, local_size_z_id = 3) in;

View File

@ -1,8 +1,9 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uInput;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform constBlock {
ivec4 size;
float minValue;

View File

@ -1,9 +1,10 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uInput;
layout(set = 0, binding = 2) uniform mediump sampler3D uKernel;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform PRECISION sampler3D uKernel;
layout(set = 0, binding = 3) readonly buffer bias {
vec4 data[];
}

View File

@ -1,9 +1,10 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly highp uniform image3D uOutput;
layout(set = 0, binding = 1) uniform highp sampler3D uInput;
layout(set = 0, binding = 2) uniform highp sampler3D uKernel;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform PRECISION sampler3D uKernel;
layout(set = 0, binding = 3) readonly buffer bias {
vec4 data[];
}

View File

@ -1,7 +1,8 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, binding = 0) uniform highp sampler3D uInput;
layout(set = 0, binding = 0) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 1) writeonly buffer destBuffer {
float data[];
}

View File

@ -1,8 +1,9 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uInput;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform constBlock {
ivec4 inputSize;
ivec4 outputSize;

View File

@ -1,8 +1,9 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uInput;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform constBlock {
int W;
int H;

View File

@ -1,9 +1,10 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uM1;
layout(set = 0, binding = 2) uniform mediump sampler3D uM2;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uM1;
layout(set = 0, binding = 2) uniform PRECISION sampler3D uM2;
layout(set = 0, binding = 3) uniform constBlock {
ivec4 outputSize;
float beta;

View File

@ -1,7 +1,8 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly highp uniform image3D uImage;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uImage;
layout(set = 0, binding = 1) readonly buffer destBuffer {
float data[];
}

View File

@ -1,8 +1,9 @@
#version 450 core
#define PRECISION $precision
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly mediump uniform image3D uOutput;
layout(set = 0, binding = 1) uniform mediump sampler3D uInput;
layout(set = 0, rgba16f, binding = 0) writeonly PRECISION uniform image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform constBlock {
int IW;
int IH;

View File

@ -4,15 +4,25 @@ if(NOT USE_VULKAN)
endif()
set(VULKAN_GEN_OUTPUT_PATH "${CMAKE_BINARY_DIR}/vulkan/ATen/native/vulkan")
set(VULKAN_GEN_ARG_ENV "")
if(USE_VULKAN_RELAXED_PRECISION)
string(APPEND VULKAN_GEN_ARG_ENV "precision=mediump")
endif()
if(USE_VULKAN_SHADERC_RUNTIME)
set(PYTHONPATH "$ENV{PYTHONPATH}")
set(ENV{PYTHONPATH} "$ENV{PYTHONPATH}:${CMAKE_CURRENT_LIST_DIR}/..")
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}"
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/gen_glsl.py
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/gen_vulkan_glsl.py
--glsl-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/glsl
--output-path ${VULKAN_GEN_OUTPUT_PATH}
--tmp-dir-path=${CMAKE_BINARY_DIR}/vulkan/glsl
--env ${VULKAN_GEN_ARG_ENV}
RESULT_VARIABLE error_code)
set(ENV{PYTHONPATH} "$PYTHONPATH")
if(error_code)
message(FATAL_ERROR "Failed to gen glsl.h and glsl.cpp with shaders sources for Vulkan backend")
@ -42,15 +52,19 @@ if(NOT USE_VULKAN_SHADERC_RUNTIME)
endif(GLSLC_PATH)
endif()
set(PYTHONPATH "$ENV{PYTHONPATH}")
set(ENV{PYTHONPATH} "$ENV{PYTHONPATH}:${CMAKE_CURRENT_LIST_DIR}/..")
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}"
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/gen_spv.py
${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/gen_vulkan_spv.py
--glsl-path ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/vulkan/glsl
--output-path ${VULKAN_GEN_OUTPUT_PATH}
--glslc-path=${GLSLC_PATH}
--tmp-spv-path=${CMAKE_BINARY_DIR}/vulkan/spv
--tmp-dir-path=${CMAKE_BINARY_DIR}/vulkan/spv
--env ${VULKAN_GEN_ARG_ENV}
RESULT_VARIABLE error_code)
set(ENV{PYTHONPATH} "$PYTHONPATH")
if(error_code)
message(FATAL_ERROR "Failed to gen spv.h and spv.cpp with precompiled shaders for Vulkan backend")