mirror of
https://github.com/zebrajr/pytorch.git
synced 2025-12-07 12:21:27 +01:00
Summary:
This change makes two major improvements to PyTorch Vulkan's shader authoring workflow.
## Review Guide
There are a lot of changed files because every GLSL shader had to be touched. The majority of changes is changing
```
#define PRECISION $precision
#define FORMAT $format
```
to
```
#define PRECISION ${PRECISION}
#define FORMAT ${FORMAT}
```
due to changes in how shader templates are processed.
For reviewers, the primary functional changes to review are:
* `gen_vulkan_spv.py`
* Majority of functional changes are in this file, which controls how shader templates are processed.
* `shader_params.yaml`
* controls how shader variants are generated
## Python Codeblocks in Shader Templates
From now on, every compute shader (i.e. `.glsl`) is treated as a shader template. To this effect, the `templates/` folder has been removed and there is now a global `shader_params.yaml` file to describe the shader variants that should be generated for all shader templates.
**Taking inspiration from XNNPACK's [`xngen` tool](https://github.com/google/XNNPACK/blob/master/tools/xngen.py), shader templates can now use Python codeblocks**. One example is:
```
$if not INPLACE:
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
layout(set = 0, binding = 2) uniform PRECISION sampler3D uOther;
layout(set = 0, binding = 3) uniform PRECISION restrict Block {
ivec4 output_sizes;
ivec4 input_sizes;
ivec4 other_sizes;
float alpha;
}
uArgs;
$else:
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict image3D uOutput;
layout(set = 0, binding = 1) uniform PRECISION sampler3D uOther;
layout(set = 0, binding = 2) uniform PRECISION restrict Block {
ivec4 output_sizes;
ivec4 other_sizes;
float alpha;
}
uArgs;
```
Another is:
```
// PYTHON CODEBLOCK
$if not IS_DIV:
const int c_index = (pos.z % ((uArgs.output_sizes.z + 3) / 4)) * 4;
if (uArgs.other_sizes.z != 1 && c_index + 3 >= uArgs.output_sizes.z) {
ivec4 c_ind = ivec4(c_index) + ivec4(0, 1, 2, 3);
vec4 mask = vec4(lessThan(c_ind, ivec4(uArgs.output_sizes.z)));
other_texel = other_texel * mask + vec4(1, 1, 1, 1) - mask;
}
// PYTHON CODEBLOCK
$if not INPLACE:
ivec3 input_pos =
map_output_pos_to_input_pos(pos, uArgs.output_sizes, uArgs.input_sizes);
const vec4 in_texel =
load_texel(input_pos, uArgs.output_sizes, uArgs.input_sizes, uInput);
imageStore(uOutput, pos, OP(in_texel, other_texel, uArgs.alpha));
$else:
const vec4 in_texel = imageLoad(uOutput, pos);
imageStore(uOutput, pos, OP(in_texel, other_texel, uArgs.alpha));
```
In addition to making it easier and clearer to write shader templates, this enables shaders that were previously unable to be consolidated into a single template to now be represented using a single template, such as non inplace and inplace variants of the same shader.
## `generate_variant_forall` in shader variant YAML configuration
YAML files that describe how shader variants should be generated can now use a `generate_variant_forall` field to iterate over various settings for a specific parameter for each variant defined. Example:
```
unary_op:
parameter_names_with_default_values:
OPERATOR: exp(X)
INPLACE: 0
generate_variant_forall:
INPLACE:
- VALUE: 0
SUFFIX: ""
- VALUE: 1
SUFFIX: "inplace"
shader_variants:
- NAME: exp
OPERATOR: exp(X)
- NAME: sqrt
OPERATOR: sqrt(X)
- NAME: log
OPERATOR: log(X)
```
Previously, the `inplace` variants would need to have separate `shader_variants` entries. If there are multiple variables that need to be iterated across, then all possible combinations will be generated. Would be good to take a look to see how the new YAML configuration works.
Test Plan:
There is no functional change to this diff; we only need to make sure that the generated shaders are still correct. Therefore, we only need to run `vulkan_api_test`.
```
# On Mac Laptop
buck run --target-platforms ovr_config//platform/macos:arm64-fbsource //xplat/caffe2:pt_vulkan_api_test_binAppleMac\#macosx-arm64 -c pt.vulkan_full_precision=1 -- --gtest_filter="*"
```
Reviewed By: digantdesai
Differential Revision: D52087084
Pull Request resolved: https://github.com/pytorch/pytorch/pull/115948
Approved by: https://github.com/manuelcandales
134 lines
3.7 KiB
Python
134 lines
3.7 KiB
Python
import tempfile
|
|
import unittest
|
|
|
|
from tools.gen_vulkan_spv import DEFAULT_ENV, SPVGenerator
|
|
|
|
####################
|
|
# Data for testing #
|
|
####################
|
|
|
|
test_shader = """
|
|
#version 450 core
|
|
|
|
#define FORMAT ${FORMAT}
|
|
#define PRECISION ${PRECISION}
|
|
#define OP(X) ${OPERATOR}
|
|
|
|
$def is_int(dtype):
|
|
$ return dtype in {"int", "int32", "int8"}
|
|
|
|
$def is_uint(dtype):
|
|
$ return dtype in {"uint", "uint32", "uint8"}
|
|
|
|
$if is_int(DTYPE):
|
|
#define VEC4_T ivec4
|
|
$elif is_uint(DTYPE):
|
|
#define VEC4_T uvec4
|
|
$else:
|
|
#define VEC4_T vec4
|
|
|
|
$if not INPLACE:
|
|
$if is_int(DTYPE):
|
|
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly iimage3D uOutput;
|
|
layout(set = 0, binding = 1) uniform PRECISION isampler3D uInput;
|
|
$elif is_uint(DTYPE):
|
|
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly uimage3D uOutput;
|
|
layout(set = 0, binding = 1) uniform PRECISION usampler3D uInput;
|
|
$else:
|
|
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict writeonly image3D uOutput;
|
|
layout(set = 0, binding = 1) uniform PRECISION sampler3D uInput;
|
|
$else:
|
|
$if is_int(DTYPE):
|
|
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict iimage3D uOutput;
|
|
$elif is_uint(DTYPE):
|
|
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict uimage3D uOutput;
|
|
$else:
|
|
layout(set = 0, binding = 0, FORMAT) uniform PRECISION restrict image3D uOutput;
|
|
|
|
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
|
|
|
|
void main() {
|
|
const ivec3 pos = ivec3(gl_GlobalInvocationID);
|
|
$if not INPLACE:
|
|
VEC4_T v = texelFetch(uInput, pos, 0);
|
|
$else:
|
|
VEC4_T v = imageLoad(uOutput, pos);
|
|
$for i in range(ITER[0]):
|
|
for (int i = 0; i < ${ITER[1]}; ++i) {
|
|
v = OP(v + i);
|
|
}
|
|
imageStore(uOutput, pos, OP(v));
|
|
}
|
|
|
|
"""
|
|
|
|
test_params_yaml = """
|
|
test_shader:
|
|
parameter_names_with_default_values:
|
|
DTYPE: float
|
|
INPLACE: false
|
|
OPERATOR: X + 3
|
|
ITER: !!python/tuple [3, 5]
|
|
generate_variant_forall:
|
|
INPLACE:
|
|
- VALUE: false
|
|
SUFFIX: ""
|
|
- VALUE: true
|
|
SUFFIX: inplace
|
|
DTYPE:
|
|
- VALUE: int8
|
|
- VALUE: float
|
|
shader_variants:
|
|
- NAME: test_shader_1
|
|
- NAME: test_shader_3
|
|
OPERATOR: X - 1
|
|
ITER: !!python/tuple [3, 2]
|
|
generate_variant_forall:
|
|
DTYPE:
|
|
- VALUE: float
|
|
- VALUE: int
|
|
|
|
"""
|
|
|
|
##############
|
|
# Unit Tests #
|
|
##############
|
|
|
|
|
|
class TestVulkanSPVCodegen(unittest.TestCase):
|
|
def setUp(self) -> None:
|
|
self.tmpdir = tempfile.TemporaryDirectory()
|
|
|
|
with open(f"{self.tmpdir.name}/test_shader.glsl,", "w") as f:
|
|
f.write(test_shader)
|
|
|
|
with open(f"{self.tmpdir.name}/test_params.yaml", "w") as f:
|
|
f.write(test_params_yaml)
|
|
|
|
self.tmpoutdir = tempfile.TemporaryDirectory()
|
|
|
|
self.generator = SPVGenerator(
|
|
src_dir_paths=self.tmpdir.name, env=DEFAULT_ENV, glslc_path=None
|
|
)
|
|
|
|
def cleanUp(self) -> None:
|
|
self.tmpdir.cleanup()
|
|
self.tmpoutdir.cleanup()
|
|
|
|
def testOutputMap(self) -> None:
|
|
# Each shader variant will produce variants generated based on all possible combinations
|
|
# of the DTYPE and INPLACE parameters. test_shader_3 has fewer generated variants due to
|
|
# a custom specified generate_variant_forall field.
|
|
expected_output_shaders = {
|
|
"test_shader_1_float",
|
|
"test_shader_1_inplace_float",
|
|
"test_shader_1_inplace_int8",
|
|
"test_shader_1_int8",
|
|
"test_shader_3_float",
|
|
"test_shader_3_int",
|
|
}
|
|
|
|
actual_output_shaders = set(self.generator.output_shader_map.keys())
|
|
|
|
self.assertEqual(expected_output_shaders, actual_output_shaders)
|