remote slang copy

This commit is contained in:
janis 2026-04-03 18:44:46 +02:00
parent c0d807b163
commit 465c6294e6
Signed by: janis
SSH key fingerprint: SHA256:bB1qbbqmDXZNT0KKD5c2Dfjg53JGhj7B3CFcLIzSqq8
152 changed files with 0 additions and 117822 deletions

View file

@ -1,115 +0,0 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
// This creates a "module", which represents a collection of source files alongside
// some compilation options, such as optimization mode and linked system libraries.
// Every executable or library we compile will be based on one or more modules.
const lib_mod = b.createModule(.{
// `root_source_file` is the Zig "entry point" of the module. If a module
// only contains e.g. external object files, you can make this `null`.
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = b.path("src/root.zig"),
.target = target,
.optimize = optimize,
});
// We will also create a module for our other entry point, 'main.zig'.
const exe_mod = b.createModule(.{
// `root_source_file` is the Zig "entry point" of the module. If a module
// only contains e.g. external object files, you can make this `null`.
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
// Modules can depend on one another using the `std.Build.Module.addImport` function.
// This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
// file path. In this case, we set up `exe_mod` to import `lib_mod`.
exe_mod.addImport("shader_builder_lib", lib_mod);
// Now, we will create a static library based on the module we created above.
// This creates a `std.Build.Step.Compile`, which is the build step responsible
// for actually invoking the compiler.
const lib = b.addStaticLibrary(.{
.name = "shader_builder",
.root_module = lib_mod,
});
// This declares intent for the library to be installed into the standard
// location when the user invokes the "install" step (the default step when
// running `zig build`).
b.installArtifact(lib);
// This creates another `std.Build.Step.Compile`, but this one builds an executable
// rather than a static library.
const exe = b.addExecutable(.{
.name = "shader_builder",
.root_module = exe_mod,
});
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const lib_unit_tests = b.addTest(.{
.root_module = lib_mod,
});
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
const exe_unit_tests = b.addTest(.{
.root_module = exe_mod,
});
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_lib_unit_tests.step);
test_step.dependOn(&run_exe_unit_tests.step);
}

View file

@ -1,73 +0,0 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "shader_builder",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
//.minimum_zig_version = "0.11.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL. If the contents of a URL change this will result in a hash mismatch
// // which will prevent zig from using it.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

View file

@ -1,45 +0,0 @@
//! By convention, main.zig is where your main function lives in the case that
//! you are building an executable. If you are making a library, the convention
//! is to delete this file and start with root.zig instead.
pub fn main() !void {
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
const stdout = bw.writer();
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try bw.flush(); // Don't forget to flush!
}
test "simple test" {
var list = std.ArrayList(i32).init(std.testing.allocator);
defer list.deinit(); // Try commenting this out and see if zig detects the memory leak!
try list.append(42);
try std.testing.expectEqual(@as(i32, 42), list.pop());
}
test "use other module" {
try std.testing.expectEqual(@as(i32, 150), lib.add(100, 50));
}
test "fuzz example" {
const global = struct {
fn testOne(input: []const u8) anyerror!void {
// Try passing `--fuzz` to `zig build test` and see if it manages to fail this test case!
try std.testing.expect(!std.mem.eql(u8, "canyoufindme", input));
}
};
try std.testing.fuzz(global.testOne, .{});
}
const std = @import("std");
/// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details.
const lib = @import("shader_builder_lib");

View file

@ -1,13 +0,0 @@
//! By convention, root.zig is the root source file when making a library. If
//! you are making an executable, the convention is to delete this file and
//! start with main.zig instead.
const std = @import("std");
const testing = std.testing;
pub export fn add(a: i32, b: i32) i32 {
return a + b;
}
test "basic add functionality" {
try testing.expect(add(3, 7) == 10);
}

View file

@ -1,29 +0,0 @@
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
LLVM Exceptions to the Apache 2.0 License
As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into an Object form of such source code, you
may redistribute such embedded portions in such Object form without complying
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
In addition, if you combine or link compiled forms of this Software with
software that is licensed under the GPLv2 ("Combined Software") and if a
court of competent jurisdiction determines that the patent provision (Section
3), the indemnity provision (Section 9) or other Section of the License
conflicts with the conditions of the GPLv2, you may retroactively and
prospectively choose to deem waived or otherwise exclude such Section(s) of
the License, but only in their entirety and only with respect to the Combined
Software.

View file

@ -1,156 +0,0 @@
Slang
=====
![CI Status](https://github.com/shader-slang/slang/actions/workflows/ci.yml/badge.svg?branch=master)
![CTS Status](https://github.com/shader-slang/slang/actions/workflows/vk-gl-cts-nightly.yml/badge.svg)
Slang is a shading language that makes it easier to build and maintain large shader codebases in a modular and extensible fashion, while also maintaining the highest possible performance on modern GPUs and graphics APIs.
Slang is based on years of collaboration between researchers at NVIDIA, Carnegie Mellon University, Stanford, MIT, UCSD and the University of Washington.
Why Slang?
---------------
The Slang shading language is designed to enable real-time graphics developers to work with large-scale, high-performance shader code.
### Write Shaders Once, Run Anywhere
The Slang compiler can generate code for a wide variety of targets: D3D12, Vulkan, Metal, D3D11, OpenGL, CUDA, and even generate code to run on a CPU. For textual targets, such as Metal Shading Language (MSL) and CUDA, Slang produces readable code that preserves original identifier names, as well as the type and call structure, making it easier to debug.
### Access the Latest GPU Features
Slang code is highly portable, but can still leverage unique platform capabilities, including the latest features in Direct3D and Vulkan. For example, developers can make full use of [pointers](https://shader-slang.com/slang/user-guide/convenience-features.html#pointers-limited) when generating SPIR-V.
Slang's [capability system](https://shader-slang.com/slang/user-guide/capabilities.html) helps applications manage feature set differences across target platforms by ensuring code only uses available features during the type-checking step, before generating final code. Additionally, Slang provides [flexible interop](https://shader-slang.com/slang/user-guide/a1-04-interop.html) features to enable directly embedding target code or SPIR-V into generated shaders.
### Leverage Neural Graphics with Automatic Differentiation
Slang can [automatically generate both forward and backward derivative propagation code](https://shader-slang.com/slang/user-guide/autodiff.html) for complex functions that involve arbitrary control flow and dynamic dispatch. This allows existing rendering codebases to easily become differentiable, or for Slang to serve as the kernel language in a PyTorch-driven machine learning framework via [`slangtorch`](https://shader-slang.com/slang/user-guide/a1-02-slangpy.html).
### Scalable Software Development with Modules
Slang provides a [module system](https://shader-slang.com/slang/user-guide/modules.html) that enables logical organization of code for separate compilation. Slang modules can be independently compiled offline to a custom IR (with optional obfuscation) and then linked at runtime to generate code in formats such as DXIL or SPIR-V.
### Code Specialization that Works with Modules
Slang supports [generics and interfaces](https://shader-slang.com/slang/user-guide/interfaces-generics.html) (a.k.a. type traits/protocols), allowing for clear expression of shader specialization without the need for preprocessor techniques or string-pasting. Unlike C++ templates, Slang's generics are pre-checked and don't produce cascading error messages that are difficult to diagnose. The same generic shader can be specialized for a variety of different types to produce specialized code ahead of time, or on the fly, entirely under application control.
### Easy On-ramp for HLSL and GLSL Codebases
Slang's syntax is similar to HLSL, and most existing HLSL code can be compiled with the Slang compiler out-of-the-box, or with just minor modifications. This allows existing shader codebases to immediately benefit from Slang without requiring a complete rewrite or port.
Slang provides a compatibility module that enables the use of most GLSL intrinsic functions and GLSL's parameter binding syntax.
### Comprehensive Tooling Support
Slang comes with full support of IntelliSense editing features in Visual Studio Code and Visual Studio through the Language Server Protocol.
Full debugging capabilities are also available through RenderDoc and SPIR-V based tools.
Getting Started
---------------
The fastest way to get started using Slang in your own development is to use a pre-built binary package, available through GitHub [releases](https://github.com/shader-slang/slang/releases).
Slang binaries are also included in the [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) since version 1.3.296.0.
There are packages built for x86_64 and aarch64 Windows, Linux and macOS.
Each binary release includes the command-line `slangc` compiler, a shared library for the compiler, and the `slang.h` header.
See the user-guide for info on using the `slangc` command-line tool: [Slang Command Line Usage](
https://shader-slang.com/slang/user-guide/compiling.html#command-line-compilation-with-slangc).
If you want to try out the Slang language without installing anything, a fast and simple way is to use the [Slang Playground](https://shader-slang.com/slang-playground). The playground allows you to compile Slang code to a variety of targets, and even run some simple shaders directly within the browser. The playground loads Slang compiler to your browser and runs all compilation locally. No data will be sent to any servers.
If you would like to build Slang from source, please consult the [build instructions](docs/building.md).
Documentation
-------------
The Slang project provides a variety of different [documentation](docs/), but most users would be well served starting with the [User's Guide](https://shader-slang.github.io/slang/user-guide/).
For developers writing Slang code, the [Slang Core Module Reference](https://shader-slang.com/stdlib-reference/) provides detailed documentation on Slang's built-in types and functions.
We also provide a few [examples](examples/) of how to integrate Slang into a rendering application.
These examples use a graphics layer that we include with Slang called "GFX" which is an abstraction library of various graphics APIs (D3D11, D2D12, OpenGL, Vulkan, CUDA, and the CPU) to support cross-platform applications using GPU graphics and compute capabilities.
If you'd like to learn more about GFX, see the [GFX User Guide](https://shader-slang.com/slang/gfx-user-guide/index.html).
Additionally, we recommend checking out [Vulkan Mini Examples](https://github.com/nvpro-samples/vk_mini_samples/) for more examples of using Slang's language features available on Vulkan, such as pointers and the ray tracing intrinsics.
Contributing
------------
If you'd like to contribute to the project, we are excited to have your input.
The following guidelines should be observed by contributors:
* Please follow the contributor [Code of Conduct](CODE_OF_CONDUCT.md).
* Bugs reports and feature requests should go through the GitHub issue tracker
* Changes should ideally come in as small pull requests on top of `master`, coming from your own personal fork of the project
* Large features that will involve multiple contributors or a long development time should be discussed in issues, and broken down into smaller pieces that can be implemented and checked in in stages
[Contribution guide](CONTRIBUTING.md) describes the workflow for contributors at more detail.
Limitations and Support
-----------------------
### Platform support
The Slang compiler and libraries can be built on the following platforms:
| Windows | Linux | MacOS | WebAssembly |
|:---------:|:---------:|:---------:|:------------:|
| supported | supported | supported | experimental |
Both `x86_64` and `aarch64` architectures are supported on Windows, Linux and MacOS platforms.
### Target support
Slang can compile shader code to the following targets:
| Target | Status | Output Formats |
|:-----------:|:-------------------------------------------------------------------------------------:|:----------------------------------------------------------------:|
| Direct3D 11 | [supported](https://shader-slang.com/slang/user-guide/targets.html#direct3d-11) | HLSL |
| Direct3D 12 | [supported](https://shader-slang.com/slang/user-guide/targets.html#direct3d-12) | HLSL |
| Vulkan | [supported](https://shader-slang.com/slang/user-guide/targets.html#vulkan) | SPIRV, GLSL |
| Metal | [experimental*](https://shader-slang.com/slang/user-guide/targets.html#metal) | Metal Shading Language |
| WebGPU | experimental** | WGSL |
| CUDA | [supported](https://shader-slang.com/slang/user-guide/targets.html#cuda-and-optix) | C++ (compute only) |
| Optix | [experimental](https://shader-slang.com/slang/user-guide/targets.html#cuda-and-optix) | C++ (WIP) |
| CPU | [experimental](https://shader-slang.com/slang/user-guide/targets.html#cpu-compute) | C++ (kernel), C++ (host), standalone executable, dynamic library |
> *Slang currently supports generating vertex, fragment, compute, task and mesh
> shaders for Metal.
> **WGSL support is still work in-progress.
For greater detail, see the [Supported Compilation
Targets](https://shader-slang.com/slang/user-guide/targets.html) section of the
[User Guide](https://shader-slang.github.io/slang/user-guide/)
The Slang project has been used for production applications and large shader
codebases, but it is still under active development. Support is currently
focused on the platforms (Windows, Linux) and target APIs (Direct3D 12, Vulkan)
where Slang is used most heavily. Users who are looking for support on other
platforms or APIs should coordinate with the development team via the issue
tracker to make sure that their use cases can be supported.
License
-------
The Slang code itself is under the Apache 2.0 with LLVM Exception license (see [LICENSE](LICENSE)).
Builds of the core Slang tools depend on the following projects, either automatically or optionally, which may have their own licenses:
* [`glslang`](https://github.com/KhronosGroup/glslang) (BSD)
* [`lz4`](https://github.com/lz4/lz4) (BSD)
* [`miniz`](https://github.com/richgel999/miniz) (MIT)
* [`spirv-headers`](https://github.com/KhronosGroup/SPIRV-Headers) (Modified MIT)
* [`spirv-tools`](https://github.com/KhronosGroup/SPIRV-Tools) (Apache 2.0)
* [`ankerl::unordered_dense::{map, set}`](https://github.com/martinus/unordered_dense) (MIT)
Slang releases may include [LLVM](https://github.com/llvm/llvm-project) under the license:
* [`llvm`](https://llvm.org/docs/DeveloperPolicy.html#new-llvm-project-license-framework) (Apache 2.0 License with LLVM exceptions)
Some of the tests and example programs that build with Slang use the following projects, which may have their own licenses:
* [`glm`](https://github.com/g-truc/glm) (MIT)
* `stb_image` and `stb_image_write` from the [`stb`](https://github.com/nothings/stb) collection of single-file libraries (Public Domain)
* [`tinyobjloader`](https://github.com/tinyobjloader/tinyobjloader) (MIT)

File diff suppressed because it is too large Load diff

View file

@ -1,444 +0,0 @@
public namespace slang
{
public typedef int32_t Result;
public typedef uint64_t Size;
public typedef int64_t Int;
public typedef uint64_t UInt;
/*!
@brief Severity of a diagnostic generated by the compiler.
Values come from the enum below, with higher values representing more severe
conditions, and all values >= SLANG_SEVERITY_ERROR indicating compilation
failure.
*/
public enum SlangSeverity
{
SLANG_SEVERITY_DISABLED = 0, /**< A message that is disabled, filtered out. */
SLANG_SEVERITY_NOTE, /**< An informative message. */
SLANG_SEVERITY_WARNING, /**< A warning, which indicates a possible proble. */
SLANG_SEVERITY_ERROR, /**< An error, indicating that compilation failed. */
SLANG_SEVERITY_FATAL, /**< An unrecoverable error, which forced compilation to abort. */
SLANG_SEVERITY_INTERNAL, /**< An internal error, indicating a logic error in the compiler. */
};
public enum SlangDiagnosticFlags
{
SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS = 0x01,
SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS = 0x02
};
public enum SlangBindableResourceType
{
SLANG_NON_BINDABLE = 0,
SLANG_TEXTURE,
SLANG_SAMPLER,
SLANG_UNIFORM_BUFFER,
SLANG_STORAGE_BUFFER,
};
public enum SlangCompileTarget
{
SLANG_TARGET_UNKNOWN,
SLANG_TARGET_NONE,
SLANG_GLSL,
SLANG_GLSL_VULKAN, //< deprecated: just use `SLANG_GLSL`
SLANG_GLSL_VULKAN_ONE_DESC, //< deprecated
SLANG_HLSL,
SLANG_SPIRV,
SLANG_SPIRV_ASM,
SLANG_DXBC,
SLANG_DXBC_ASM,
SLANG_DXIL,
SLANG_DXIL_ASM,
SLANG_C_SOURCE, ///< The C language
SLANG_CPP_SOURCE, ///< C++ code for shader kernels.
SLANG_CPP_PYTORCH_BINDING,
SLANG_HOST_EXECUTABLE, ///< Standalone binary executable (for hosting CPU/OS)
SLANG_SHADER_SHARED_LIBRARY, ///< A shared library/Dll for shader kernels (for hosting CPU/OS)
SLANG_SHADER_HOST_CALLABLE, ///< A CPU target that makes the compiled shader code available to be run immediately
SLANG_CUDA_SOURCE, ///< Cuda source
SLANG_PTX, ///< PTX
SLANG_OBJECT_CODE, ///< Object code that can be used for later linking
SLANG_HOST_CPP_SOURCE, ///< C++ code for host library or executable.
SLANG_HOST_HOST_CALLABLE, ///<
SLANG_TARGET_COUNT_OF,
};
/* A "container format" describes the way that the outputs
for multiple files, entry points, targets, etc. should be
combined into a single artifact for output. */
public enum SlangContainerFormat
{
/* Don't generate a container. */
SLANG_CONTAINER_FORMAT_NONE,
/* Generate a container in the `.slang-module` format,
which includes reflection information, compiled kernels, etc. */
SLANG_CONTAINER_FORMAT_SLANG_MODULE,
};
public enum SlangPassThrough : int
{
SLANG_PASS_THROUGH_NONE,
SLANG_PASS_THROUGH_FXC,
SLANG_PASS_THROUGH_DXC,
SLANG_PASS_THROUGH_GLSLANG,
SLANG_PASS_THROUGH_SPIRV_DIS,
SLANG_PASS_THROUGH_CLANG, ///< Clang C/C++ compiler
SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler
SLANG_PASS_THROUGH_GCC, ///< GCC C/C++ compiler
SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type
SLANG_PASS_THROUGH_NVRTC, ///< NVRTC Cuda compiler
SLANG_PASS_THROUGH_LLVM, ///< LLVM 'compiler' - includes LLVM and Clang
SLANG_PASS_THROUGH_SPIRV_OPT,
SLANG_PASS_THROUGH_COUNT_OF,
};
/* Defines an archive type used to holds a 'file system' type structure. */
public enum SlangArchiveType : int
{
SLANG_ARCHIVE_TYPE_UNDEFINED,
SLANG_ARCHIVE_TYPE_ZIP,
SLANG_ARCHIVE_TYPE_RIFF, ///< Riff container with no compression
SLANG_ARCHIVE_TYPE_RIFF_DEFLATE,
SLANG_ARCHIVE_TYPE_RIFF_LZ4,
SLANG_ARCHIVE_TYPE_COUNT_OF,
};
/*!
Flags to control compilation behavior.
*/
public enum SlangCompileFlags
{
/* Do as little mangling of names as possible, to try to preserve original names */
SLANG_COMPILE_FLAG_NO_MANGLING = 1 << 3,
/* Skip code generation step, just check the code and generate layout */
SLANG_COMPILE_FLAG_NO_CODEGEN = 1 << 4,
/* Obfuscate shader names on release products */
SLANG_COMPILE_FLAG_OBFUSCATE = 1 << 5,
/* Deprecated flags: kept around to allow existing applications to
compile. Note that the relevant features will still be left in
their default state. */
SLANG_COMPILE_FLAG_NO_CHECKING = 0,
SLANG_COMPILE_FLAG_SPLIT_MIXED_TYPES = 0,
};
/*!
@brief Flags to control code generation behavior of a compilation target */
public enum SlangTargetFlags
{
None = 0,
/* When compiling for a D3D Shader Model 5.1 or higher target, allocate
distinct register spaces for parameter blocks.
@deprecated This behavior is now enabled unconditionally.
*/
SLANG_TARGET_FLAG_PARAMETER_BLOCKS_USE_REGISTER_SPACES = 1 << 4,
/* When set, will generate target code that contains all entrypoints defined
in the input source or specified via the `spAddEntryPoint` function in a
single output module (library/source file).
*/
SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM = 1 << 8,
/* When set, will dump out the IR between intermediate compilation steps.*/
SLANG_TARGET_FLAG_DUMP_IR = 1 << 9,
/* When set, will generate SPIRV directly instead of going through glslang. */
SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY = 1 << 10,
};
/*!
@brief Options to control floating-point precision guarantees for a target.
*/
public enum SlangFloatingPointMode
{
SLANG_FLOATING_POINT_MODE_DEFAULT = 0,
SLANG_FLOATING_POINT_MODE_FAST,
SLANG_FLOATING_POINT_MODE_PRECISE,
};
/*!
@brief Options to control emission of `#line` directives
*/
public enum SlangLineDirectiveMode
{
SLANG_LINE_DIRECTIVE_MODE_DEFAULT = 0, /**< Default behavior: pick behavior base on target. */
SLANG_LINE_DIRECTIVE_MODE_NONE, /**< Don't emit line directives at all. */
SLANG_LINE_DIRECTIVE_MODE_STANDARD, /**< Emit standard C-style `#line` directives. */
SLANG_LINE_DIRECTIVE_MODE_GLSL, /**< Emit GLSL-style directives with file *number* instead of name */
};
public enum SlangSourceLanguage : int
{
SLANG_SOURCE_LANGUAGE_UNKNOWN,
SLANG_SOURCE_LANGUAGE_SLANG,
SLANG_SOURCE_LANGUAGE_HLSL,
SLANG_SOURCE_LANGUAGE_GLSL,
SLANG_SOURCE_LANGUAGE_C,
SLANG_SOURCE_LANGUAGE_CPP,
SLANG_SOURCE_LANGUAGE_CUDA,
SLANG_SOURCE_LANGUAGE_COUNT_OF,
};
public enum SlangProfileID
{
SLANG_PROFILE_UNKNOWN,
};
public enum SlangCapabilityID
{
SLANG_CAPABILITY_UNKNOWN = 0,
};
public enum SlangMatrixLayoutMode
{
SLANG_MATRIX_LAYOUT_MODE_UNKNOWN = 0,
SLANG_MATRIX_LAYOUT_ROW_MAJOR,
SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
};
public enum SlangStage
{
SLANG_STAGE_NONE,
SLANG_STAGE_VERTEX,
SLANG_STAGE_HULL,
SLANG_STAGE_DOMAIN,
SLANG_STAGE_GEOMETRY,
SLANG_STAGE_FRAGMENT,
SLANG_STAGE_COMPUTE,
SLANG_STAGE_RAY_GENERATION,
SLANG_STAGE_INTERSECTION,
SLANG_STAGE_ANY_HIT,
SLANG_STAGE_CLOSEST_HIT,
SLANG_STAGE_MISS,
SLANG_STAGE_CALLABLE,
SLANG_STAGE_MESH,
SLANG_STAGE_AMPLIFICATION,
};
public enum SlangDebugInfoLevel
{
SLANG_DEBUG_INFO_LEVEL_NONE = 0, /**< Don't emit debug information at all. */
SLANG_DEBUG_INFO_LEVEL_MINIMAL, /**< Emit as little debug information as possible, while still supporting stack trackes. */
SLANG_DEBUG_INFO_LEVEL_STANDARD, /**< Emit whatever is the standard level of debug information for each target. */
SLANG_DEBUG_INFO_LEVEL_MAXIMAL, /**< Emit as much debug infromation as possible for each target. */
};
public enum SlangOptimizationLevel
{
SLANG_OPTIMIZATION_LEVEL_NONE = 0, /**< Don't optimize at all. */
SLANG_OPTIMIZATION_LEVEL_DEFAULT, /**< Default optimization level: balance code quality and compilation time. */
SLANG_OPTIMIZATION_LEVEL_HIGH, /**< Optimize aggressively. */
SLANG_OPTIMIZATION_LEVEL_MAXIMAL, /**< Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs */
};
public enum SlangTypeKind
{
NONE,
STRUCT,
ARRAY,
MATRIX,
VECTOR,
SCALAR,
CONSTANT_BUFFER,
RESOURCE,
SAMPLER_STATE,
TEXTURE_BUFFER,
SHADER_STORAGE_BUFFER,
PARAMETER_BLOCK,
GENERIC_TYPE_PARAMETER,
INTERFACE,
OUTPUT_STREAM,
SPECIALIZED,
FEEDBACK,
COUNT,
};
public enum SlangScalarType
{
NONE,
VOID,
BOOL,
INT32,
UINT32,
INT64,
UINT64,
FLOAT16,
FLOAT32,
FLOAT64,
INT8,
UINT8,
INT16,
UINT16,
};
public struct TypeReflection
{
};
public enum CompileStdLibFlags
{
WriteDocumentation = 0x1,
};
[COM("8BA5FB08-5195-40e2-AC58-0D-98-9C-3A-01-02")]
public interface ISlangBlob
{
public void *getBufferPointer();
public Size getBufferSize();
};
/** Description of a code generation target.
*/
public struct TargetDesc
{
/** The size of this structure, in bytes.
*/
public Size structureSize = 40;
/** The target format to generate code for (e.g., SPIR-V, DXIL, etc.)
*/
public SlangCompileTarget format = SlangCompileTarget.SLANG_TARGET_UNKNOWN;
/** The compilation profile supported by the target (e.g., "Shader Model 5.1")
*/
public SlangProfileID profile = SlangProfileID.SLANG_PROFILE_UNKNOWN;
/** Flags for the code generation target. Currently unused. */
public SlangTargetFlags flags = SlangTargetFlags.None;
/** Default mode to use for floating-point operations on the target.
*/
public SlangFloatingPointMode floatingPointMode = SlangFloatingPointMode.SLANG_FLOATING_POINT_MODE_DEFAULT;
/** Optimization level to use for the target.
*/
public SlangOptimizationLevel optimizationLevel = SlangOptimizationLevel.SLANG_OPTIMIZATION_LEVEL_DEFAULT;
/** The line directive mode for output source code.
*/
public SlangLineDirectiveMode lineDirectiveMode = SlangLineDirectiveMode.SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
/** Whether to force `scalar` layout for glsl shader storage buffers.
*/
public bool forceGLSLScalarBufferLayout = false;
};
public enum SessionFlags
{
kSessionFlags_None = 0
};
public struct PreprocessorMacroDesc
{
public NativeString name;
public NativeString value;
};
public struct SessionDesc
{
/** The size of this structure, in bytes.
*/
public Size structureSize = 72;
/** Code generation targets to include in the session.
*/
public TargetDesc *targets = nullptr;
public Int targetCount = 0;
/** Flags to configure the session.
*/
public SessionFlags flags = SessionFlags.kSessionFlags_None;
/** Default layout to assume for variables with matrix types.
*/
public SlangMatrixLayoutMode defaultMatrixLayoutMode = SlangMatrixLayoutMode.SLANG_MATRIX_LAYOUT_ROW_MAJOR;
/** Paths to use when searching for `#include`d or `import`ed files.
*/
public NativeString *searchPaths = nullptr;
public Int searchPathCount = 0;
public PreprocessorMacroDesc *preprocessorMacros = nullptr;
public Int preprocessorMacroCount = 0;
public void *fileSystem = nullptr;
};
/** A global session for interaction with the Slang library.
An application may create and re-use a single global session across
multiple sessions, in order to amortize startups costs (in current
Slang this is mostly the cost of loading the Slang standard library).
The global session is currently *not* thread-safe and objects created from
a single global session should only be used from a single thread at
a time.
*/
[COM("c140b5fd-0c78-452e-ba7c-1a-1e-70-c7-f7-1c")]
public interface IGlobalSession
{
};
public enum class ContainerType
{
None, UnsizedArray, StructuredBuffer, ConstantBuffer, ParameterBlock
};
/** A session provides a scope for code that is loaded.
A session can be used to load modules of Slang source code,
and to request target-specific compiled binaries and layout
information.
In order to be able to load code, the session owns a set
of active "search paths" for resolving `#include` directives
and `import` declrations, as well as a set of global
preprocessor definitions that will be used for all code
that gets `import`ed in the session.
If multiple user shaders are loaded in the same session,
and import the same module (e.g., two source files do `import X`)
then there will only be one copy of `X` loaded within the session.
In order to be able to generate target code, the session
owns a list of available compilation targets, which specify
code generation options.
Code loaded and compiled within a session is owned by the session
and will remain resident in memory until the session is released.
Applications wishing to control the memory usage for compiled
and loaded code should use multiple sessions.
*/
[COM("67618701-d116-468f-ab3b-47-4b-ed-ce-0e-3d")]
public interface ISession
{
};
[COM("5bc42be8-5c50-4929-9e5e-d15e7c24015f")]
public interface IComponentType
{
}
public struct TypeLayoutReflection { }
/** The kind of specialization argument. */
public enum class SpecializationArgKind : int32_t
{
Unknown, /**< An invalid specialization argument. */
Type, /**< Specialize to a type. */
};
public struct SpecializationArg
{
public SpecializationArgKind kind;
/** A type specialization argument, used for `Kind::Type`. */
public TypeReflection *type;
}
}

View file

@ -1,44 +0,0 @@
####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() #######
####### Any changes to this file will be overwritten by the next CMake run ####
####### The input file was SlangConfig.cmake.in ########
get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../" ABSOLUTE)
macro(set_and_check _var _file)
set(${_var} "${_file}")
if(NOT EXISTS "${_file}")
message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
endif()
endmacro()
macro(check_required_components _NAME)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(NOT ${_NAME}_${comp}_FOUND)
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
endif()
endif()
endforeach()
endmacro()
####################################################################################
if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
include("${CMAKE_CURRENT_LIST_DIR}/slangTargets.cmake")
check_required_components("slang")
endif()
if(ON)
find_program(SLANGC_EXECUTABLE "slangc" HINTS ENV PATH "${PACKAGE_PREFIX_DIR}/bin")
if (NOT SLANGC_EXECUTABLE)
message(STATUS "slangc executable not found; ensure it is available in your PATH.")
endif()
set(SLANG_EXECUTABLE ${SLANGC_EXECUTABLE} CACHE STRING "Path to the slangc executable")
endif()

View file

@ -1,65 +0,0 @@
# This is a basic version file for the Config-mode of find_package().
# It is used by write_basic_package_version_file() as input file for configure_file()
# to create a version-file which can be installed along a config.cmake file.
#
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
# the requested version string are exactly the same and it sets
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
# but only if the requested major version is the same as the current one.
# The variable CVF_VERSION must be set before calling configure_file().
set(PACKAGE_VERSION "2025.3.1")
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
if("2025.3.1" MATCHES "^([0-9]+)\\.")
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0)
string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}")
endif()
else()
set(CVF_VERSION_MAJOR "2025.3.1")
endif()
if(PACKAGE_FIND_VERSION_RANGE)
# both endpoints of the range must have the expected major version
math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1")
if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT)))
set(PACKAGE_VERSION_COMPATIBLE FALSE)
elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX)))
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
else()
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()
endif()
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "")
return()
endif()
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8")
math(EXPR installedBits "8 * 8")
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()

View file

@ -1,70 +0,0 @@
#----------------------------------------------------------------
# Generated CMake target import file for configuration "Release".
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Import target "slang::slang-llvm" for configuration "Release"
set_property(TARGET slang::slang-llvm APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slang-llvm PROPERTIES
IMPORTED_COMMON_LANGUAGE_RUNTIME_RELEASE ""
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libslang-llvm.so"
IMPORTED_NO_SONAME_RELEASE "TRUE"
)
list(APPEND _cmake_import_check_targets slang::slang-llvm )
list(APPEND _cmake_import_check_files_for_slang::slang-llvm "${_IMPORT_PREFIX}/lib/libslang-llvm.so" )
# Import target "slang::slang-glslang" for configuration "Release"
set_property(TARGET slang::slang-glslang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slang-glslang PROPERTIES
IMPORTED_COMMON_LANGUAGE_RUNTIME_RELEASE ""
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libslang-glslang.so"
IMPORTED_NO_SONAME_RELEASE "TRUE"
)
list(APPEND _cmake_import_check_targets slang::slang-glslang )
list(APPEND _cmake_import_check_files_for_slang::slang-glslang "${_IMPORT_PREFIX}/lib/libslang-glslang.so" )
# Import target "slang::slangd" for configuration "Release"
set_property(TARGET slang::slangd APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slangd PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/bin/slangd"
)
list(APPEND _cmake_import_check_targets slang::slangd )
list(APPEND _cmake_import_check_files_for_slang::slangd "${_IMPORT_PREFIX}/bin/slangd" )
# Import target "slang::gfx" for configuration "Release"
set_property(TARGET slang::gfx APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::gfx PROPERTIES
IMPORTED_LINK_DEPENDENT_LIBRARIES_RELEASE "slang::slang"
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libgfx.so"
IMPORTED_SONAME_RELEASE "libgfx.so"
)
list(APPEND _cmake_import_check_targets slang::gfx )
list(APPEND _cmake_import_check_files_for_slang::gfx "${_IMPORT_PREFIX}/lib/libgfx.so" )
# Import target "slang::slang" for configuration "Release"
set_property(TARGET slang::slang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slang PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libslang.so"
IMPORTED_SONAME_RELEASE "libslang.so"
)
list(APPEND _cmake_import_check_targets slang::slang )
list(APPEND _cmake_import_check_files_for_slang::slang "${_IMPORT_PREFIX}/lib/libslang.so" )
# Import target "slang::slangc" for configuration "Release"
set_property(TARGET slang::slangc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slangc PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/bin/slangc"
)
list(APPEND _cmake_import_check_targets slang::slangc )
list(APPEND _cmake_import_check_files_for_slang::slangc "${_IMPORT_PREFIX}/bin/slangc" )
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)

View file

@ -1,123 +0,0 @@
# Generated by CMake
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8)
message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
if(CMAKE_VERSION VERSION_LESS "2.8.3")
message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.8.3...3.29)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_cmake_targets_defined "")
set(_cmake_targets_not_defined "")
set(_cmake_expected_targets "")
foreach(_cmake_expected_target IN ITEMS slang::slang-llvm slang::slang-glslang slang::slangd slang::gfx slang::slang slang::slangc)
list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
if(TARGET "${_cmake_expected_target}")
list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
else()
list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
endif()
endforeach()
unset(_cmake_expected_target)
if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
unset(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
return()
endif()
if(NOT _cmake_targets_defined STREQUAL "")
string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
endif()
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
# Compute the installation prefix relative to this file.
get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
if(_IMPORT_PREFIX STREQUAL "/")
set(_IMPORT_PREFIX "")
endif()
# Create imported target slang::slang-llvm
add_library(slang::slang-llvm MODULE IMPORTED)
set_target_properties(slang::slang-llvm PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "SLANG_DYNAMIC"
)
# Create imported target slang::slang-glslang
add_library(slang::slang-glslang MODULE IMPORTED)
# Create imported target slang::slangd
add_executable(slang::slangd IMPORTED)
# Create imported target slang::gfx
add_library(slang::gfx SHARED IMPORTED)
set_target_properties(slang::gfx PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "SLANG_GFX_DYNAMIC"
)
# Create imported target slang::slang
add_library(slang::slang SHARED IMPORTED)
# Create imported target slang::slangc
add_executable(slang::slangc IMPORTED)
# Load information for each installed configuration.
file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/slangTargets-*.cmake")
foreach(_cmake_config_file IN LISTS _cmake_config_files)
include("${_cmake_config_file}")
endforeach()
unset(_cmake_config_file)
unset(_cmake_config_files)
# Cleanup temporary variables.
set(_IMPORT_PREFIX)
# Loop over all imported files and verify that they actually exist
foreach(_cmake_target IN LISTS _cmake_import_check_targets)
if(CMAKE_VERSION VERSION_LESS "3.28"
OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target}
OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}")
foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}")
if(NOT EXISTS "${_cmake_file}")
message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file
\"${_cmake_file}\"
but this file does not exist. Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
\"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
endif()
endforeach()
endif()
unset(_cmake_file)
unset("_cmake_import_check_files_for_${_cmake_target}")
endforeach()
unset(_cmake_target)
unset(_cmake_import_check_targets)
# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)

View file

@ -1,200 +0,0 @@
#ifndef SLANG_COM_HELPER_H
#define SLANG_COM_HELPER_H
/** \file slang-com-helper.h
*/
#include "slang.h"
#include <atomic>
/* !!!!!!!!!!!!!!!!!!!!! Macros to help checking SlangResult !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*! Set SLANG_HANDLE_RESULT_FAIL(x) to code to be executed whenever an error occurs, and is detected
* by one of the macros */
#ifndef SLANG_HANDLE_RESULT_FAIL
#define SLANG_HANDLE_RESULT_FAIL(x)
#endif
//! Helper macro, that makes it easy to add result checking to calls in functions/methods that
//! themselves return Result.
#define SLANG_RETURN_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return _res; \
} \
}
//! Helper macro that can be used to test the return value from a call, and will return in a void
//! method/function
#define SLANG_RETURN_VOID_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return; \
} \
}
//! Helper macro that will return false on failure.
#define SLANG_RETURN_FALSE_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return false; \
} \
}
//! Helper macro that will return nullptr on failure.
#define SLANG_RETURN_NULL_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return nullptr; \
} \
}
//! Helper macro that will assert if the return code from a call is failure, also returns the
//! failure.
#define SLANG_ASSERT_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
assert(false); \
return _res; \
} \
}
//! Helper macro that will assert if the result from a call is a failure, also returns.
#define SLANG_ASSERT_VOID_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
assert(false); \
return; \
} \
}
/* !!!!!!!!!!!!!!!!!!!!!!! C++ helpers !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
#if defined(__cplusplus)
namespace Slang
{
// Alias SlangResult to Slang::Result
typedef SlangResult Result;
// Alias SlangUUID to Slang::Guid
typedef SlangUUID Guid;
} // namespace Slang
// Operator == and != for Guid/SlangUUID
SLANG_FORCE_INLINE bool operator==(const Slang::Guid& aIn, const Slang::Guid& bIn)
{
using namespace Slang;
// Use the largest type the honors the alignment of Guid
typedef uint32_t CmpType;
union GuidCompare
{
Guid guid;
CmpType data[sizeof(Guid) / sizeof(CmpType)];
};
// Type pun - so compiler can 'see' the pun and not break aliasing rules
const CmpType* a = reinterpret_cast<const GuidCompare&>(aIn).data;
const CmpType* b = reinterpret_cast<const GuidCompare&>(bIn).data;
// Make the guid comparison a single branch, by not using short circuit
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
}
SLANG_FORCE_INLINE bool operator!=(const Slang::Guid& a, const Slang::Guid& b)
{
return !(a == b);
}
/* !!!!!!!! Macros to simplify implementing COM interfaces !!!!!!!!!!!!!!!!!!!!!!!!!!!! */
/* Assumes underlying implementation has a member m_refCount that is initialized to 0 and can
have ++ and -- operate on it. For SLANG_IUNKNOWN_QUERY_INTERFACE to work - must have a method
'getInterface' that returns valid pointers for the Guid, or nullptr if not found. */
#define SLANG_IUNKNOWN_QUERY_INTERFACE \
SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface( \
SlangUUID const& uuid, \
void** outObject) SLANG_OVERRIDE \
{ \
ISlangUnknown* intf = getInterface(uuid); \
if (intf) \
{ \
addRef(); \
*outObject = intf; \
return SLANG_OK; \
} \
return SLANG_E_NO_INTERFACE; \
}
#define SLANG_IUNKNOWN_ADD_REF \
SLANG_NO_THROW uint32_t SLANG_MCALL addRef() \
{ \
return ++m_refCount; \
}
#define SLANG_IUNKNOWN_RELEASE \
SLANG_NO_THROW uint32_t SLANG_MCALL release() \
{ \
--m_refCount; \
if (m_refCount == 0) \
{ \
delete this; \
return 0; \
} \
return m_refCount; \
}
#define SLANG_IUNKNOWN_ALL \
SLANG_IUNKNOWN_QUERY_INTERFACE \
SLANG_IUNKNOWN_ADD_REF \
SLANG_IUNKNOWN_RELEASE
// ------------------------ RefObject IUnknown -----------------------------
#define SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface( \
SlangUUID const& uuid, \
void** outObject) SLANG_OVERRIDE \
{ \
void* intf = getInterface(uuid); \
if (intf) \
{ \
addReference(); \
*outObject = intf; \
return SLANG_OK; \
} \
return SLANG_E_NO_INTERFACE; \
}
#define SLANG_REF_OBJECT_IUNKNOWN_ADD_REF \
SLANG_NO_THROW uint32_t SLANG_MCALL addRef() SLANG_OVERRIDE \
{ \
return (uint32_t)addReference(); \
}
#define SLANG_REF_OBJECT_IUNKNOWN_RELEASE \
SLANG_NO_THROW uint32_t SLANG_MCALL release() SLANG_OVERRIDE \
{ \
return (uint32_t)releaseReference(); \
}
#define SLANG_REF_OBJECT_IUNKNOWN_ALL \
SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
SLANG_REF_OBJECT_IUNKNOWN_ADD_REF \
SLANG_REF_OBJECT_IUNKNOWN_RELEASE
#endif // defined(__cplusplus)
#endif

View file

@ -1,210 +0,0 @@
#ifndef SLANG_COM_PTR_H
#define SLANG_COM_PTR_H
#include "slang-com-helper.h"
#include <assert.h>
#include <cstddef>
namespace Slang
{
/*! \brief ComPtr is a simple smart pointer that manages types which implement COM based interfaces.
\details A class that implements a COM, must derive from the IUnknown interface or a type that
matches it's layout exactly (such as ISlangUnknown). Trying to use this template with a class that
doesn't follow these rules, will lead to undefined behavior. This is a 'strong' pointer type, and
will AddRef when a non null pointer is set and Release when the pointer leaves scope. Using 'detach'
allows a pointer to be removed from the management of the ComPtr. To set the smart pointer to null,
there is the method setNull, or alternatively just assign SLANG_NULL/nullptr.
One edge case using the template is that sometimes you want access as a pointer to a pointer.
Sometimes this is to write into the smart pointer, other times to pass as an array. To handle these
different behaviors there are the methods readRef and writeRef, which are used instead of the &
(ref) operator. For example
\code
Void doSomething(ID3D12Resource** resources, IndexT numResources);
// ...
ComPtr<ID3D12Resource> resources[3];
doSomething(resources[0].readRef(), SLANG_COUNT_OF(resource));
\endcode
A more common scenario writing to the pointer
\code
IUnknown* unk = ...;
ComPtr<ID3D12Resource> resource;
Result res = unk->QueryInterface(resource.writeRef());
\endcode
*/
// Enum to force initializing as an attach (without adding a reference)
enum InitAttach
{
INIT_ATTACH
};
template<class T>
class ComPtr
{
public:
typedef T Type;
typedef ComPtr ThisType;
typedef ISlangUnknown* Ptr;
/// Constructors
/// Default Ctor. Sets to nullptr
SLANG_FORCE_INLINE ComPtr()
: m_ptr(nullptr)
{
}
SLANG_FORCE_INLINE ComPtr(std::nullptr_t)
: m_ptr(nullptr)
{
}
/// Sets, and ref counts.
SLANG_FORCE_INLINE explicit ComPtr(T* ptr)
: m_ptr(ptr)
{
if (ptr)
((Ptr)ptr)->addRef();
}
/// The copy ctor
SLANG_FORCE_INLINE ComPtr(const ThisType& rhs)
: m_ptr(rhs.m_ptr)
{
if (m_ptr)
((Ptr)m_ptr)->addRef();
}
/// Ctor without adding to ref count.
SLANG_FORCE_INLINE explicit ComPtr(InitAttach, T* ptr)
: m_ptr(ptr)
{
}
/// Ctor without adding to ref count
SLANG_FORCE_INLINE ComPtr(InitAttach, const ThisType& rhs)
: m_ptr(rhs.m_ptr)
{
}
#ifdef SLANG_HAS_MOVE_SEMANTICS
/// Move Ctor
SLANG_FORCE_INLINE ComPtr(ThisType&& rhs)
: m_ptr(rhs.m_ptr)
{
rhs.m_ptr = nullptr;
}
/// Move assign
SLANG_FORCE_INLINE ComPtr& operator=(ThisType&& rhs)
{
T* swap = m_ptr;
m_ptr = rhs.m_ptr;
rhs.m_ptr = swap;
return *this;
}
#endif
/// Destructor releases the pointer, assuming it is set
SLANG_FORCE_INLINE ~ComPtr()
{
if (m_ptr)
((Ptr)m_ptr)->release();
}
// !!! Operators !!!
/// Returns the dumb pointer
SLANG_FORCE_INLINE operator T*() const { return m_ptr; }
SLANG_FORCE_INLINE T& operator*() { return *m_ptr; }
/// For making method invocations through the smart pointer work through the dumb pointer
SLANG_FORCE_INLINE T* operator->() const { return m_ptr; }
/// Assign
SLANG_FORCE_INLINE const ThisType& operator=(const ThisType& rhs);
/// Assign from dumb ptr
SLANG_FORCE_INLINE T* operator=(T* in);
/// Get the pointer and don't ref
SLANG_FORCE_INLINE T* get() const { return m_ptr; }
/// Release a contained nullptr pointer if set
SLANG_FORCE_INLINE void setNull();
/// Detach
SLANG_FORCE_INLINE T* detach()
{
T* ptr = m_ptr;
m_ptr = nullptr;
return ptr;
}
/// Set to a pointer without changing the ref count
SLANG_FORCE_INLINE void attach(T* in) { m_ptr = in; }
/// Get ready for writing (nulls contents)
SLANG_FORCE_INLINE T** writeRef()
{
setNull();
return &m_ptr;
}
/// Get for read access
SLANG_FORCE_INLINE T* const* readRef() const { return &m_ptr; }
/// Swap
void swap(ThisType& rhs);
protected:
/// Gets the address of the dumb pointer.
// Disabled: use writeRef and readRef to get a reference based on usage.
#ifndef SLANG_COM_PTR_ENABLE_REF_OPERATOR
SLANG_FORCE_INLINE T** operator&() = delete;
#endif
T* m_ptr;
};
//----------------------------------------------------------------------------
template<typename T>
void ComPtr<T>::setNull()
{
if (m_ptr)
{
((Ptr)m_ptr)->release();
m_ptr = nullptr;
}
}
//----------------------------------------------------------------------------
template<typename T>
const ComPtr<T>& ComPtr<T>::operator=(const ThisType& rhs)
{
if (rhs.m_ptr)
((Ptr)rhs.m_ptr)->addRef();
if (m_ptr)
((Ptr)m_ptr)->release();
m_ptr = rhs.m_ptr;
return *this;
}
//----------------------------------------------------------------------------
template<typename T>
T* ComPtr<T>::operator=(T* ptr)
{
if (ptr)
((Ptr)ptr)->addRef();
if (m_ptr)
((Ptr)m_ptr)->release();
m_ptr = ptr;
return m_ptr;
}
//----------------------------------------------------------------------------
template<typename T>
void ComPtr<T>::swap(ThisType& rhs)
{
T* tmp = m_ptr;
m_ptr = rhs.m_ptr;
rhs.m_ptr = tmp;
}
} // namespace Slang
#endif // SLANG_COM_PTR_H

View file

@ -1,58 +0,0 @@
#ifndef SLANG_CPP_HOST_PRELUDE_H
#define SLANG_CPP_HOST_PRELUDE_H
#include <cmath>
#include <cstdio>
#include <cstring>
#define SLANG_COM_PTR_ENABLE_REF_OPERATOR 1
#include "../source/slang-rt/slang-rt.h"
#include "slang-com-ptr.h"
#include "slang-cpp-types.h"
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
#if SLANG_GCC_FAMILY && __GNUC__ < 6
#include <cmath>
#define SLANG_PRELUDE_STD std::
#else
#include <math.h>
#define SLANG_PRELUDE_STD
#endif
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#endif // SLANG_LLVM
#if defined(_MSC_VER)
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
// __attribute__((__visibility__("default")))
#endif
#ifdef __cplusplus
#define SLANG_PRELUDE_EXTERN_C extern "C"
#define SLANG_PRELUDE_EXTERN_C_START \
extern "C" \
{
#define SLANG_PRELUDE_EXTERN_C_END }
#else
#define SLANG_PRELUDE_EXTERN_C
#define SLANG_PRELUDE_EXTERN_C_START
#define SLANG_PRELUDE_EXTERN_C_END
#endif
#include "slang-cpp-scalar-intrinsics.h"
using namespace Slang;
template<typename TResult, typename... Args>
using Slang_FuncType = TResult(SLANG_MCALL*)(Args...);
#endif

View file

@ -1,322 +0,0 @@
#ifndef SLANG_CPP_PRELUDE_H
#define SLANG_CPP_PRELUDE_H
// Because the signiture of isnan, isfinite, and is isinf changed in C++, we use the macro
// to use the version in the std namespace.
// https://stackoverflow.com/questions/39130040/cmath-hides-isnan-in-math-h-in-c14-c11
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
#if SLANG_GCC_FAMILY && __GNUC__ < 6
#include <cmath>
#define SLANG_PRELUDE_STD std::
#else
#include <math.h>
#define SLANG_PRELUDE_STD
#endif
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#endif // SLANG_LLVM
#if defined(_MSC_VER)
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
// __attribute__((__visibility__("default")))
#endif
#ifdef __cplusplus
#define SLANG_PRELUDE_EXTERN_C extern "C"
#define SLANG_PRELUDE_EXTERN_C_START \
extern "C" \
{
#define SLANG_PRELUDE_EXTERN_C_END }
#else
#define SLANG_PRELUDE_EXTERN_C
#define SLANG_PRELUDE_EXTERN_C_START
#define SLANG_PRELUDE_EXTERN_C_END
#endif
#define SLANG_PRELUDE_EXPORT SLANG_PRELUDE_EXTERN_C SLANG_PRELUDE_SHARED_LIB_EXPORT
#define SLANG_PRELUDE_EXPORT_START SLANG_PRELUDE_EXTERN_C_START SLANG_PRELUDE_SHARED_LIB_EXPORT
#define SLANG_PRELUDE_EXPORT_END SLANG_PRELUDE_EXTERN_C_END
#ifndef INFINITY
// Must overflow for double
#define INFINITY float(1e+300 * 1e+300)
#endif
#ifndef SLANG_INFINITY
#define SLANG_INFINITY INFINITY
#endif
// Detect the compiler type
#ifndef SLANG_COMPILER
#define SLANG_COMPILER
/*
Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
NOTE that SLANG_VC holds the compiler version - not just 1 or 0
*/
#if defined(_MSC_VER)
#if _MSC_VER >= 1900
#define SLANG_VC 14
#elif _MSC_VER >= 1800
#define SLANG_VC 12
#elif _MSC_VER >= 1700
#define SLANG_VC 11
#elif _MSC_VER >= 1600
#define SLANG_VC 10
#elif _MSC_VER >= 1500
#define SLANG_VC 9
#else
#error "unknown version of Visual C++ compiler"
#endif
#elif defined(__clang__)
#define SLANG_CLANG 1
#elif defined(__SNC__)
#define SLANG_SNC 1
#elif defined(__ghs__)
#define SLANG_GHS 1
#elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
#define SLANG_GCC 1
#else
#error "unknown compiler"
#endif
/*
Any compilers not detected by the above logic are now now explicitly zeroed out.
*/
#ifndef SLANG_VC
#define SLANG_VC 0
#endif
#ifndef SLANG_CLANG
#define SLANG_CLANG 0
#endif
#ifndef SLANG_SNC
#define SLANG_SNC 0
#endif
#ifndef SLANG_GHS
#define SLANG_GHS 0
#endif
#ifndef SLANG_GCC
#define SLANG_GCC 0
#endif
#endif /* SLANG_COMPILER */
/*
The following section attempts to detect the target platform being compiled for.
If an application defines `SLANG_PLATFORM` before including this header,
they take responsibility for setting any compiler-dependent macros
used later in the file.
Most applications should not need to touch this section.
*/
#ifndef SLANG_PLATFORM
#define SLANG_PLATFORM
/**
Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
*/
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
#define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
#elif defined(XBOXONE)
#define SLANG_XBOXONE 1
#elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
#define SLANG_WIN64 1
#elif defined(_M_PPC)
#define SLANG_X360 1
#elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
#define SLANG_WIN32 1
#elif defined(__ANDROID__)
#define SLANG_ANDROID 1
#elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
#define SLANG_LINUX 1
#elif defined(__APPLE__) && !defined(SLANG_LLVM)
#include "TargetConditionals.h"
#if TARGET_OS_MAC
#define SLANG_OSX 1
#else
#define SLANG_IOS 1
#endif
#elif defined(__APPLE__)
// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its
// OSX.
#define SLANG_OSX 1
#elif defined(__CELLOS_LV2__)
#define SLANG_PS3 1
#elif defined(__ORBIS__)
#define SLANG_PS4 1
#elif defined(__SNC__) && defined(__arm__)
#define SLANG_PSP2 1
#elif defined(__ghs__)
#define SLANG_WIIU 1
#else
#error "unknown target platform"
#endif
/*
Any platforms not detected by the above logic are now now explicitly zeroed out.
*/
#ifndef SLANG_WINRT
#define SLANG_WINRT 0
#endif
#ifndef SLANG_XBOXONE
#define SLANG_XBOXONE 0
#endif
#ifndef SLANG_WIN64
#define SLANG_WIN64 0
#endif
#ifndef SLANG_X360
#define SLANG_X360 0
#endif
#ifndef SLANG_WIN32
#define SLANG_WIN32 0
#endif
#ifndef SLANG_ANDROID
#define SLANG_ANDROID 0
#endif
#ifndef SLANG_LINUX
#define SLANG_LINUX 0
#endif
#ifndef SLANG_IOS
#define SLANG_IOS 0
#endif
#ifndef SLANG_OSX
#define SLANG_OSX 0
#endif
#ifndef SLANG_PS3
#define SLANG_PS3 0
#endif
#ifndef SLANG_PS4
#define SLANG_PS4 0
#endif
#ifndef SLANG_PSP2
#define SLANG_PSP2 0
#endif
#ifndef SLANG_WIIU
#define SLANG_WIIU 0
#endif
#endif /* SLANG_PLATFORM */
/* Shorthands for "families" of compilers/platforms */
#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */
#define SLANG_UNIX_FAMILY \
(SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
// GCC Specific
#if SLANG_GCC_FAMILY
#define SLANG_ALIGN_OF(T) __alignof__(T)
#define SLANG_BREAKPOINT(id) __builtin_trap()
// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a
// non POD type, even though it produces the correct result
#define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
#endif // SLANG_GCC_FAMILY
// Microsoft VC specific
#if SLANG_VC
#define SLANG_ALIGN_OF(T) __alignof(T)
#define SLANG_BREAKPOINT(id) __debugbreak();
#endif // SLANG_VC
// Default impls
#ifndef SLANG_OFFSET_OF
#define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
#endif
#ifndef SLANG_BREAKPOINT
// Make it crash with a write to 0!
#define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
#endif
// If slang.h has been included we don't need any of these definitions
#ifndef SLANG_H
/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
#ifndef SLANG_NO_THROW
#if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
#define SLANG_NO_THROW __declspec(nothrow)
#endif
#endif
#ifndef SLANG_NO_THROW
#define SLANG_NO_THROW
#endif
/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
convention for interface methods.
*/
#ifndef SLANG_STDCALL
#if SLANG_MICROSOFT_FAMILY
#define SLANG_STDCALL __stdcall
#else
#define SLANG_STDCALL
#endif
#endif
#ifndef SLANG_MCALL
#define SLANG_MCALL SLANG_STDCALL
#endif
#ifndef SLANG_FORCE_INLINE
#define SLANG_FORCE_INLINE inline
#endif
// TODO(JS): Should these be in slang-cpp-types.h?
// They are more likely to clash with slang.h
struct SlangUUID
{
uint32_t data1;
uint16_t data2;
uint16_t data3;
uint8_t data4[8];
};
typedef int32_t SlangResult;
struct ISlangUnknown
{
virtual SLANG_NO_THROW SlangResult SLANG_MCALL
queryInterface(SlangUUID const& uuid, void** outObject) = 0;
virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
};
#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
public: \
SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid() \
{ \
static const SlangUUID guid = {a, b, c, d0, d1, d2, d3, d4, d5, d6, d7}; \
return guid; \
}
#endif // SLANG_H
// Includes
#include "slang-cpp-scalar-intrinsics.h"
#include "slang-cpp-types.h"
// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables.
#if defined(_MSC_VER)
#pragma warning(disable : 4700)
#endif
#ifndef SLANG_UNROLL
#define SLANG_UNROLL
#endif
#endif

View file

@ -1,805 +0,0 @@
#ifndef SLANG_PRELUDE_SCALAR_INTRINSICS_H
#define SLANG_PRELUDE_SCALAR_INTRINSICS_H
#if !defined(SLANG_LLVM) && SLANG_PROCESSOR_X86_64 && SLANG_VC
// If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include
// x86 intrinsics
#include <intrin.h>
#endif
#ifndef SLANG_FORCE_INLINE
#define SLANG_FORCE_INLINE inline
#endif
#ifdef SLANG_PRELUDE_NAMESPACE
namespace SLANG_PRELUDE_NAMESPACE
{
#endif
#ifndef SLANG_PRELUDE_PI
#define SLANG_PRELUDE_PI 3.14159265358979323846
#endif
union Union32
{
uint32_t u;
int32_t i;
float f;
};
union Union64
{
uint64_t u;
int64_t i;
double d;
};
// 32 bit cast conversions
SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f)
{
Union32 u;
u.f = f;
return u.i;
}
SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i)
{
Union32 u;
u.i = i;
return u.f;
}
SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f)
{
Union32 u;
u.f = f;
return u.u;
}
SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui)
{
Union32 u;
u.u = ui;
return u.f;
}
// ----------------------------- F16 -----------------------------------------
// This impl is based on FloatToHalf that is in Slang codebase
SLANG_FORCE_INLINE uint32_t f32tof16(const float value)
{
const uint32_t inBits = _bitCastFloatToUInt(value);
// bits initially set to just the sign bit
uint32_t bits = (inBits >> 16) & 0x8000;
// Mantissa can't be used as is, as it holds last bit, for rounding.
uint32_t m = (inBits >> 12) & 0x07ff;
uint32_t e = (inBits >> 23) & 0xff;
if (e < 103)
{
// It's zero
return bits;
}
if (e == 0xff)
{
// Could be a NAN or INF. Is INF if *input* mantissa is 0.
// Remove last bit for rounding to make output mantissa.
m >>= 1;
// We *assume* float16/float32 signaling bit and remaining bits
// semantics are the same. (The signalling bit convention is target specific!).
// Non signal bit's usage within mantissa for a NAN are also target specific.
// If the m is 0, it could be because the result is INF, but it could also be because all
// the bits that made NAN were dropped as we have less mantissa bits in f16.
// To fix for this we make non zero if m is 0 and the input mantissa was not.
// This will (typically) produce a signalling NAN.
m += uint32_t(m == 0 && (inBits & 0x007fffffu));
// Combine for output
return (bits | 0x7c00u | m);
}
if (e > 142)
{
// INF.
return bits | 0x7c00u;
}
if (e < 113)
{
m |= 0x0800u;
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return bits;
}
bits |= ((e - 112) << 10) | (m >> 1);
bits += m & 1;
return bits;
}
static const float g_f16tof32Magic = _bitCastIntToFloat((127 + (127 - 15)) << 23);
SLANG_FORCE_INLINE float f16tof32(const uint32_t value)
{
const uint32_t sign = (value & 0x8000) << 16;
uint32_t exponent = (value & 0x7c00) >> 10;
uint32_t mantissa = (value & 0x03ff);
if (exponent == 0)
{
// If mantissa is 0 we are done, as output is 0.
// If it's not zero we must have a denormal.
if (mantissa)
{
// We have a denormal so use the magic to do exponent adjust
return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic;
}
}
else
{
// If the exponent is NAN or INF exponent is 0x1f on input.
// If that's the case, we just need to set the exponent to 0xff on output
// and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just
// copy the bits
//
// Else we need to correct the exponent in the normalized case.
exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127));
}
return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13));
}
// ----------------------------- F32 -----------------------------------------
// Helpers
SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians);
#ifdef SLANG_LLVM
SLANG_PRELUDE_EXTERN_C_START
// Unary
float F32_ceil(float f);
float F32_floor(float f);
float F32_round(float f);
float F32_sin(float f);
float F32_cos(float f);
float F32_tan(float f);
float F32_asin(float f);
float F32_acos(float f);
float F32_atan(float f);
float F32_sinh(float f);
float F32_cosh(float f);
float F32_tanh(float f);
float F32_log2(float f);
float F32_log(float f);
float F32_log10(float f);
float F32_exp2(float f);
float F32_exp(float f);
float F32_abs(float f);
float F32_trunc(float f);
float F32_sqrt(float f);
bool F32_isnan(float f);
bool F32_isfinite(float f);
bool F32_isinf(float f);
// Binary
SLANG_FORCE_INLINE float F32_min(float a, float b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE float F32_max(float a, float b)
{
return a > b ? a : b;
}
float F32_pow(float a, float b);
float F32_fmod(float a, float b);
float F32_remainder(float a, float b);
float F32_atan2(float a, float b);
float F32_frexp(float x, int* e);
float F32_modf(float x, float* ip);
// Ternary
SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
{
return a * b + c;
}
SLANG_PRELUDE_EXTERN_C_END
#else
// Unary
SLANG_FORCE_INLINE float F32_ceil(float f)
{
return ::ceilf(f);
}
SLANG_FORCE_INLINE float F32_floor(float f)
{
return ::floorf(f);
}
SLANG_FORCE_INLINE float F32_round(float f)
{
return ::roundf(f);
}
SLANG_FORCE_INLINE float F32_sin(float f)
{
return ::sinf(f);
}
SLANG_FORCE_INLINE float F32_cos(float f)
{
return ::cosf(f);
}
SLANG_FORCE_INLINE float F32_tan(float f)
{
return ::tanf(f);
}
SLANG_FORCE_INLINE float F32_asin(float f)
{
return ::asinf(f);
}
SLANG_FORCE_INLINE float F32_acos(float f)
{
return ::acosf(f);
}
SLANG_FORCE_INLINE float F32_atan(float f)
{
return ::atanf(f);
}
SLANG_FORCE_INLINE float F32_sinh(float f)
{
return ::sinhf(f);
}
SLANG_FORCE_INLINE float F32_cosh(float f)
{
return ::coshf(f);
}
SLANG_FORCE_INLINE float F32_tanh(float f)
{
return ::tanhf(f);
}
SLANG_FORCE_INLINE float F32_log2(float f)
{
return ::log2f(f);
}
SLANG_FORCE_INLINE float F32_log(float f)
{
return ::logf(f);
}
SLANG_FORCE_INLINE float F32_log10(float f)
{
return ::log10f(f);
}
SLANG_FORCE_INLINE float F32_exp2(float f)
{
return ::exp2f(f);
}
SLANG_FORCE_INLINE float F32_exp(float f)
{
return ::expf(f);
}
SLANG_FORCE_INLINE float F32_abs(float f)
{
return ::fabsf(f);
}
SLANG_FORCE_INLINE float F32_trunc(float f)
{
return ::truncf(f);
}
SLANG_FORCE_INLINE float F32_sqrt(float f)
{
return ::sqrtf(f);
}
SLANG_FORCE_INLINE bool F32_isnan(float f)
{
return SLANG_PRELUDE_STD isnan(f);
}
SLANG_FORCE_INLINE bool F32_isfinite(float f)
{
return SLANG_PRELUDE_STD isfinite(f);
}
SLANG_FORCE_INLINE bool F32_isinf(float f)
{
return SLANG_PRELUDE_STD isinf(f);
}
// Binary
SLANG_FORCE_INLINE float F32_min(float a, float b)
{
return ::fminf(a, b);
}
SLANG_FORCE_INLINE float F32_max(float a, float b)
{
return ::fmaxf(a, b);
}
SLANG_FORCE_INLINE float F32_pow(float a, float b)
{
return ::powf(a, b);
}
SLANG_FORCE_INLINE float F32_fmod(float a, float b)
{
return ::fmodf(a, b);
}
SLANG_FORCE_INLINE float F32_remainder(float a, float b)
{
return ::remainderf(a, b);
}
SLANG_FORCE_INLINE float F32_atan2(float a, float b)
{
return float(::atan2(a, b));
}
SLANG_FORCE_INLINE float F32_frexp(float x, int* e)
{
return ::frexpf(x, e);
}
SLANG_FORCE_INLINE float F32_modf(float x, float* ip)
{
return ::modff(x, ip);
}
// Ternary
SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
{
return ::fmaf(a, b, c);
}
#endif
SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians)
{
// Put 0 to 2pi cycles to cycle around 0 to 1
float a = radians * (1.0f / float(SLANG_PRELUDE_PI * 2));
// Get truncated fraction, as value in 0 - 1 range
a = a - F32_floor(a);
// Convert back to 0 - 2pi range
return (a * float(SLANG_PRELUDE_PI * 2));
}
SLANG_FORCE_INLINE float F32_rsqrt(float f)
{
return 1.0f / F32_sqrt(f);
}
SLANG_FORCE_INLINE float F32_sign(float f)
{
return (f == 0.0f) ? f : ((f < 0.0f) ? -1.0f : 1.0f);
}
SLANG_FORCE_INLINE float F32_frac(float f)
{
return f - F32_floor(f);
}
SLANG_FORCE_INLINE uint32_t F32_asuint(float f)
{
Union32 u;
u.f = f;
return u.u;
}
SLANG_FORCE_INLINE int32_t F32_asint(float f)
{
Union32 u;
u.f = f;
return u.i;
}
// ----------------------------- F64 -----------------------------------------
SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians);
#ifdef SLANG_LLVM
SLANG_PRELUDE_EXTERN_C_START
// Unary
double F64_ceil(double f);
double F64_floor(double f);
double F64_round(double f);
double F64_sin(double f);
double F64_cos(double f);
double F64_tan(double f);
double F64_asin(double f);
double F64_acos(double f);
double F64_atan(double f);
double F64_sinh(double f);
double F64_cosh(double f);
double F64_tanh(double f);
double F64_log2(double f);
double F64_log(double f);
double F64_log10(double f);
double F64_exp2(double f);
double F64_exp(double f);
double F64_abs(double f);
double F64_trunc(double f);
double F64_sqrt(double f);
bool F64_isnan(double f);
bool F64_isfinite(double f);
bool F64_isinf(double f);
// Binary
SLANG_FORCE_INLINE double F64_min(double a, double b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE double F64_max(double a, double b)
{
return a > b ? a : b;
}
double F64_pow(double a, double b);
double F64_fmod(double a, double b);
double F64_remainder(double a, double b);
double F64_atan2(double a, double b);
double F64_frexp(double x, int* e);
double F64_modf(double x, double* ip);
// Ternary
SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
{
return a * b + c;
}
SLANG_PRELUDE_EXTERN_C_END
#else // SLANG_LLVM
// Unary
SLANG_FORCE_INLINE double F64_ceil(double f)
{
return ::ceil(f);
}
SLANG_FORCE_INLINE double F64_floor(double f)
{
return ::floor(f);
}
SLANG_FORCE_INLINE double F64_round(double f)
{
return ::round(f);
}
SLANG_FORCE_INLINE double F64_sin(double f)
{
return ::sin(f);
}
SLANG_FORCE_INLINE double F64_cos(double f)
{
return ::cos(f);
}
SLANG_FORCE_INLINE double F64_tan(double f)
{
return ::tan(f);
}
SLANG_FORCE_INLINE double F64_asin(double f)
{
return ::asin(f);
}
SLANG_FORCE_INLINE double F64_acos(double f)
{
return ::acos(f);
}
SLANG_FORCE_INLINE double F64_atan(double f)
{
return ::atan(f);
}
SLANG_FORCE_INLINE double F64_sinh(double f)
{
return ::sinh(f);
}
SLANG_FORCE_INLINE double F64_cosh(double f)
{
return ::cosh(f);
}
SLANG_FORCE_INLINE double F64_tanh(double f)
{
return ::tanh(f);
}
SLANG_FORCE_INLINE double F64_log2(double f)
{
return ::log2(f);
}
SLANG_FORCE_INLINE double F64_log(double f)
{
return ::log(f);
}
SLANG_FORCE_INLINE double F64_log10(float f)
{
return ::log10(f);
}
SLANG_FORCE_INLINE double F64_exp2(double f)
{
return ::exp2(f);
}
SLANG_FORCE_INLINE double F64_exp(double f)
{
return ::exp(f);
}
SLANG_FORCE_INLINE double F64_abs(double f)
{
return ::fabs(f);
}
SLANG_FORCE_INLINE double F64_trunc(double f)
{
return ::trunc(f);
}
SLANG_FORCE_INLINE double F64_sqrt(double f)
{
return ::sqrt(f);
}
SLANG_FORCE_INLINE bool F64_isnan(double f)
{
return SLANG_PRELUDE_STD isnan(f);
}
SLANG_FORCE_INLINE bool F64_isfinite(double f)
{
return SLANG_PRELUDE_STD isfinite(f);
}
SLANG_FORCE_INLINE bool F64_isinf(double f)
{
return SLANG_PRELUDE_STD isinf(f);
}
// Binary
SLANG_FORCE_INLINE double F64_min(double a, double b)
{
return ::fmin(a, b);
}
SLANG_FORCE_INLINE double F64_max(double a, double b)
{
return ::fmax(a, b);
}
SLANG_FORCE_INLINE double F64_pow(double a, double b)
{
return ::pow(a, b);
}
SLANG_FORCE_INLINE double F64_fmod(double a, double b)
{
return ::fmod(a, b);
}
SLANG_FORCE_INLINE double F64_remainder(double a, double b)
{
return ::remainder(a, b);
}
SLANG_FORCE_INLINE double F64_atan2(double a, double b)
{
return ::atan2(a, b);
}
SLANG_FORCE_INLINE double F64_frexp(double x, int* e)
{
return ::frexp(x, e);
}
SLANG_FORCE_INLINE double F64_modf(double x, double* ip)
{
return ::modf(x, ip);
}
// Ternary
SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
{
return ::fma(a, b, c);
}
#endif // SLANG_LLVM
SLANG_FORCE_INLINE double F64_rsqrt(double f)
{
return 1.0 / F64_sqrt(f);
}
SLANG_FORCE_INLINE double F64_sign(double f)
{
return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0);
}
SLANG_FORCE_INLINE double F64_frac(double f)
{
return f - F64_floor(f);
}
SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t* low, uint32_t* hi)
{
Union64 u;
u.d = d;
*low = uint32_t(u.u);
*hi = uint32_t(u.u >> 32);
}
SLANG_FORCE_INLINE void F64_asint(double d, int32_t* low, int32_t* hi)
{
Union64 u;
u.d = d;
*low = int32_t(u.u);
*hi = int32_t(u.u >> 32);
}
SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
{
// Put 0 to 2pi cycles to cycle around 0 to 1
double a = radians * (1.0f / (SLANG_PRELUDE_PI * 2));
// Get truncated fraction, as value in 0 - 1 range
a = a - F64_floor(a);
// Convert back to 0 - 2pi range
return (a * (SLANG_PRELUDE_PI * 2));
}
// ----------------------------- I32 -----------------------------------------
SLANG_FORCE_INLINE int32_t I32_abs(int32_t f)
{
return (f < 0) ? -f : f;
}
SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b)
{
return a > b ? a : b;
}
SLANG_FORCE_INLINE float I32_asfloat(int32_t x)
{
Union32 u;
u.i = x;
return u.f;
}
SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x)
{
return uint32_t(x);
}
SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi)
{
Union64 u;
u.u = (uint64_t(hi) << 32) | uint32_t(low);
return u.d;
}
// ----------------------------- U32 -----------------------------------------
SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f)
{
return f;
}
SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b)
{
return a > b ? a : b;
}
SLANG_FORCE_INLINE float U32_asfloat(uint32_t x)
{
Union32 u;
u.u = x;
return u.f;
}
SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x)
{
return uint32_t(x);
}
SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi)
{
Union64 u;
u.u = (uint64_t(hi) << 32) | low;
return u.d;
}
SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
{
#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
return __builtin_popcount(v);
#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
return __popcnt(v);
#else
uint32_t c = 0;
while (v)
{
c++;
v &= v - 1;
}
return c;
#endif
}
// ----------------------------- U64 -----------------------------------------
SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f)
{
return f;
}
SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b)
{
return a > b ? a : b;
}
// TODO(JS): We don't define countbits for 64bit in the core module currently.
// It's not clear from documentation if it should return 32 or 64 bits, if it exists.
// 32 bits can always hold the result, and will be implicitly promoted.
SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
{
#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
return uint32_t(__builtin_popcountl(v));
#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
return uint32_t(__popcnt64(v));
#else
uint32_t c = 0;
while (v)
{
c++;
v &= v - 1;
}
return c;
#endif
}
// ----------------------------- I64 -----------------------------------------
SLANG_FORCE_INLINE int64_t I64_abs(int64_t f)
{
return (f < 0) ? -f : f;
}
SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b)
{
return a > b ? a : b;
}
// ----------------------------- Interlocked ---------------------------------
#if SLANG_LLVM
#else // SLANG_LLVM
#ifdef _WIN32
#include <intrin.h>
#endif
SLANG_FORCE_INLINE void InterlockedAdd(uint32_t* dest, uint32_t value, uint32_t* oldValue)
{
#ifdef _WIN32
*oldValue = _InterlockedExchangeAdd((long*)dest, (long)value);
#else
*oldValue = __sync_fetch_and_add(dest, value);
#endif
}
#endif // SLANG_LLVM
// ----------------------- fmod --------------------------
SLANG_FORCE_INLINE float _slang_fmod(float x, float y)
{
return F32_fmod(x, y);
}
SLANG_FORCE_INLINE double _slang_fmod(double x, double y)
{
return F64_fmod(x, y);
}
#ifdef SLANG_PRELUDE_NAMESPACE
}
#endif
#endif

View file

@ -1,671 +0,0 @@
#ifndef SLANG_PRELUDE_CPP_TYPES_CORE_H
#define SLANG_PRELUDE_CPP_TYPES_CORE_H
#ifndef SLANG_PRELUDE_ASSERT
#ifdef SLANG_PRELUDE_ENABLE_ASSERT
#define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE)
#else
#define SLANG_PRELUDE_ASSERT(VALUE)
#endif
#endif
// Since we are using unsigned arithmatic care is need in this comparison.
// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0
// Which means only a single test is needed
// Asserts for bounds checking.
// It is assumed index/count are unsigned types.
#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
// Macros to zero index if an access is out of range
#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
index = (index <= (sizeInBytes - elemSize)) ? index : 0;
// The 'FIX' macro define how the index is fixed. The default is to do nothing. If
// SLANG_ENABLE_BOUND_ZERO_INDEX the fix macro will zero the index, if out of range
#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
#define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
#else
#define SLANG_BOUND_FIX(index, count)
#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
#ifndef SLANG_BOUND_CHECK
#define SLANG_BOUND_CHECK(index, count) \
SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
#endif
#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
#define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#endif
#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
#define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) \
SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
struct TypeInfo
{
size_t typeSize;
};
template<typename T, size_t SIZE>
struct FixedArray
{
const T& operator[](size_t index) const
{
SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
return m_data[index];
}
T& operator[](size_t index)
{
SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
return m_data[index];
}
T m_data[SIZE];
};
// An array that has no specified size, becomes a 'Array'. This stores the size so it can
// potentially do bounds checking.
template<typename T>
struct Array
{
const T& operator[](size_t index) const
{
SLANG_BOUND_CHECK(index, count);
return data[index];
}
T& operator[](size_t index)
{
SLANG_BOUND_CHECK(index, count);
return data[index];
}
T* data;
size_t count;
};
/* Constant buffers become a pointer to the contained type, so ConstantBuffer<T> becomes T* in C++
* code.
*/
template<typename T, int COUNT>
struct Vector;
template<typename T>
struct Vector<T, 1>
{
T x;
const T& operator[](size_t /*index*/) const { return x; }
T& operator[](size_t /*index*/) { return x; }
operator T() const { return x; }
Vector() = default;
Vector(T scalar) { x = scalar; }
template<typename U>
Vector(Vector<U, 1> other)
{
x = (T)other.x;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 1;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T>
struct Vector<T, 2>
{
T x, y;
const T& operator[](size_t index) const { return index == 0 ? x : y; }
T& operator[](size_t index) { return index == 0 ? x : y; }
Vector() = default;
Vector(T scalar) { x = y = scalar; }
Vector(T _x, T _y)
{
x = _x;
y = _y;
}
template<typename U>
Vector(Vector<U, 2> other)
{
x = (T)other.x;
y = (T)other.y;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 2;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T>
struct Vector<T, 3>
{
T x, y, z;
const T& operator[](size_t index) const { return *((T*)(this) + index); }
T& operator[](size_t index) { return *((T*)(this) + index); }
Vector() = default;
Vector(T scalar) { x = y = z = scalar; }
Vector(T _x, T _y, T _z)
{
x = _x;
y = _y;
z = _z;
}
template<typename U>
Vector(Vector<U, 3> other)
{
x = (T)other.x;
y = (T)other.y;
z = (T)other.z;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 3;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T>
struct Vector<T, 4>
{
T x, y, z, w;
const T& operator[](size_t index) const { return *((T*)(this) + index); }
T& operator[](size_t index) { return *((T*)(this) + index); }
Vector() = default;
Vector(T scalar) { x = y = z = w = scalar; }
Vector(T _x, T _y, T _z, T _w)
{
x = _x;
y = _y;
z = _z;
w = _w;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 4;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T, int N>
SLANG_FORCE_INLINE Vector<T, N> _slang_select(
Vector<bool, N> condition,
Vector<T, N> v0,
Vector<T, N> v1)
{
Vector<T, N> result;
for (int i = 0; i < N; i++)
{
result[i] = condition[i] ? v0[i] : v1[i];
}
return result;
}
template<typename T>
SLANG_FORCE_INLINE T _slang_select(bool condition, T v0, T v1)
{
return condition ? v0 : v1;
}
template<typename T, int N>
SLANG_FORCE_INLINE T _slang_vector_get_element(Vector<T, N> x, int index)
{
return x[index];
}
template<typename T, int N>
SLANG_FORCE_INLINE const T* _slang_vector_get_element_ptr(const Vector<T, N>* x, int index)
{
return &((*const_cast<Vector<T, N>*>(x))[index]);
}
template<typename T, int N>
SLANG_FORCE_INLINE T* _slang_vector_get_element_ptr(Vector<T, N>* x, int index)
{
return &((*x)[index]);
}
template<typename T, int n, typename OtherT, int m>
SLANG_FORCE_INLINE Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> other)
{
Vector<T, n> result;
for (int i = 0; i < n; i++)
{
OtherT otherElement = T(0);
if (i < m)
otherElement = _slang_vector_get_element(other, i);
*_slang_vector_get_element_ptr(&result, i) = (T)otherElement;
}
return result;
}
typedef uint32_t uint;
#define SLANG_VECTOR_BINARY_OP(T, op) \
template<int n> \
SLANG_FORCE_INLINE Vector<T, n> operator op( \
const Vector<T, n>& thisVal, \
const Vector<T, n>& other) \
{ \
Vector<T, n> result; \
for (int i = 0; i < n; i++) \
result[i] = thisVal[i] op other[i]; \
return result; \
}
#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op) \
template<int n> \
SLANG_FORCE_INLINE Vector<bool, n> operator op( \
const Vector<T, n>& thisVal, \
const Vector<T, n>& other) \
{ \
Vector<bool, n> result; \
for (int i = 0; i < n; i++) \
result[i] = thisVal[i] op other[i]; \
return result; \
}
#define SLANG_VECTOR_UNARY_OP(T, op) \
template<int n> \
SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal) \
{ \
Vector<T, n> result; \
for (int i = 0; i < n; i++) \
result[i] = op thisVal[i]; \
return result; \
}
#define SLANG_INT_VECTOR_OPS(T) \
SLANG_VECTOR_BINARY_OP(T, +) \
SLANG_VECTOR_BINARY_OP(T, -) \
SLANG_VECTOR_BINARY_OP(T, *) \
SLANG_VECTOR_BINARY_OP(T, /) \
SLANG_VECTOR_BINARY_OP(T, &) \
SLANG_VECTOR_BINARY_OP(T, |) \
SLANG_VECTOR_BINARY_OP(T, &&) \
SLANG_VECTOR_BINARY_OP(T, ||) \
SLANG_VECTOR_BINARY_OP(T, ^) \
SLANG_VECTOR_BINARY_OP(T, %) \
SLANG_VECTOR_BINARY_OP(T, >>) \
SLANG_VECTOR_BINARY_OP(T, <<) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, !=) \
SLANG_VECTOR_UNARY_OP(T, !) \
SLANG_VECTOR_UNARY_OP(T, ~)
#define SLANG_FLOAT_VECTOR_OPS(T) \
SLANG_VECTOR_BINARY_OP(T, +) \
SLANG_VECTOR_BINARY_OP(T, -) \
SLANG_VECTOR_BINARY_OP(T, *) \
SLANG_VECTOR_BINARY_OP(T, /) \
SLANG_VECTOR_UNARY_OP(T, -) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)
SLANG_INT_VECTOR_OPS(bool)
SLANG_INT_VECTOR_OPS(int)
SLANG_INT_VECTOR_OPS(int8_t)
SLANG_INT_VECTOR_OPS(int16_t)
SLANG_INT_VECTOR_OPS(int64_t)
SLANG_INT_VECTOR_OPS(uint)
SLANG_INT_VECTOR_OPS(uint8_t)
SLANG_INT_VECTOR_OPS(uint16_t)
SLANG_INT_VECTOR_OPS(uint64_t)
SLANG_FLOAT_VECTOR_OPS(float)
SLANG_FLOAT_VECTOR_OPS(double)
#define SLANG_VECTOR_INT_NEG_OP(T) \
template<int N> \
Vector<T, N> operator-(const Vector<T, N>& thisVal) \
{ \
Vector<T, N> result; \
for (int i = 0; i < N; i++) \
result[i] = 0 - thisVal[i]; \
return result; \
}
SLANG_VECTOR_INT_NEG_OP(int)
SLANG_VECTOR_INT_NEG_OP(int8_t)
SLANG_VECTOR_INT_NEG_OP(int16_t)
SLANG_VECTOR_INT_NEG_OP(int64_t)
SLANG_VECTOR_INT_NEG_OP(uint)
SLANG_VECTOR_INT_NEG_OP(uint8_t)
SLANG_VECTOR_INT_NEG_OP(uint16_t)
SLANG_VECTOR_INT_NEG_OP(uint64_t)
#define SLANG_FLOAT_VECTOR_MOD(T) \
template<int N> \
Vector<T, N> operator%(const Vector<T, N>& left, const Vector<T, N>& right) \
{ \
Vector<T, N> result; \
for (int i = 0; i < N; i++) \
result[i] = _slang_fmod(left[i], right[i]); \
return result; \
}
SLANG_FLOAT_VECTOR_MOD(float)
SLANG_FLOAT_VECTOR_MOD(double)
#undef SLANG_FLOAT_VECTOR_MOD
#undef SLANG_VECTOR_BINARY_OP
#undef SLANG_VECTOR_UNARY_OP
#undef SLANG_INT_VECTOR_OPS
#undef SLANG_FLOAT_VECTOR_OPS
#undef SLANG_VECTOR_INT_NEG_OP
#undef SLANG_FLOAT_VECTOR_MOD
template<typename T, int ROWS, int COLS>
struct Matrix
{
Vector<T, COLS> rows[ROWS];
Vector<T, COLS>& operator[](size_t index) { return rows[index]; }
Matrix() = default;
Matrix(T scalar)
{
for (int i = 0; i < ROWS; i++)
rows[i] = Vector<T, COLS>(scalar);
}
Matrix(const Vector<T, COLS>& row0) { rows[0] = row0; }
Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
{
rows[0] = row0;
rows[1] = row1;
}
Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2)
{
rows[0] = row0;
rows[1] = row1;
rows[2] = row2;
}
Matrix(
const Vector<T, COLS>& row0,
const Vector<T, COLS>& row1,
const Vector<T, COLS>& row2,
const Vector<T, COLS>& row3)
{
rows[0] = row0;
rows[1] = row1;
rows[2] = row2;
rows[3] = row3;
}
template<typename U, int otherRow, int otherCol>
Matrix(const Matrix<U, otherRow, otherCol>& other)
{
int minRow = ROWS;
int minCol = COLS;
if (minRow > otherRow)
minRow = otherRow;
if (minCol > otherCol)
minCol = otherCol;
for (int i = 0; i < minRow; i++)
for (int j = 0; j < minCol; j++)
rows[i][j] = (T)other.rows[i][j];
}
Matrix(T v0, T v1, T v2, T v3)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[1][0] = v2;
rows[1][1] = v3;
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5)
{
if (COLS == 3)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[1][0] = v3;
rows[1][1] = v4;
rows[1][2] = v5;
}
else
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[1][0] = v2;
rows[1][1] = v3;
rows[2][0] = v4;
rows[2][1] = v5;
}
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
{
if (COLS == 4)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[0][3] = v3;
rows[1][0] = v4;
rows[1][1] = v5;
rows[1][2] = v6;
rows[1][3] = v7;
}
else
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[1][0] = v2;
rows[1][1] = v3;
rows[2][0] = v4;
rows[2][1] = v5;
rows[3][0] = v6;
rows[3][1] = v7;
}
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[1][0] = v3;
rows[1][1] = v4;
rows[1][2] = v5;
rows[2][0] = v6;
rows[2][1] = v7;
rows[2][2] = v8;
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
{
if (COLS == 4)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[0][3] = v3;
rows[1][0] = v4;
rows[1][1] = v5;
rows[1][2] = v6;
rows[1][3] = v7;
rows[2][0] = v8;
rows[2][1] = v9;
rows[2][2] = v10;
rows[2][3] = v11;
}
else
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[1][0] = v3;
rows[1][1] = v4;
rows[1][2] = v5;
rows[2][0] = v6;
rows[2][1] = v7;
rows[2][2] = v8;
rows[3][0] = v9;
rows[3][1] = v10;
rows[3][2] = v11;
}
}
Matrix(
T v0,
T v1,
T v2,
T v3,
T v4,
T v5,
T v6,
T v7,
T v8,
T v9,
T v10,
T v11,
T v12,
T v13,
T v14,
T v15)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[0][3] = v3;
rows[1][0] = v4;
rows[1][1] = v5;
rows[1][2] = v6;
rows[1][3] = v7;
rows[2][0] = v8;
rows[2][1] = v9;
rows[2][2] = v10;
rows[2][3] = v11;
rows[3][0] = v12;
rows[3][1] = v13;
rows[3][2] = v14;
rows[3][3] = v15;
}
};
#define SLANG_MATRIX_BINARY_OP(T, op) \
template<int R, int C> \
Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j]; \
return result; \
}
#define SLANG_MATRIX_UNARY_OP(T, op) \
template<int R, int C> \
Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result[i].rows[i][j] = op thisVal.rows[i][j]; \
return result; \
}
#define SLANG_INT_MATRIX_OPS(T) \
SLANG_MATRIX_BINARY_OP(T, +) \
SLANG_MATRIX_BINARY_OP(T, -) \
SLANG_MATRIX_BINARY_OP(T, *) \
SLANG_MATRIX_BINARY_OP(T, /) \
SLANG_MATRIX_BINARY_OP(T, &) \
SLANG_MATRIX_BINARY_OP(T, |) \
SLANG_MATRIX_BINARY_OP(T, &&) \
SLANG_MATRIX_BINARY_OP(T, ||) \
SLANG_MATRIX_BINARY_OP(T, ^) \
SLANG_MATRIX_BINARY_OP(T, %) \
SLANG_MATRIX_UNARY_OP(T, !) \
SLANG_MATRIX_UNARY_OP(T, ~)
#define SLANG_FLOAT_MATRIX_OPS(T) \
SLANG_MATRIX_BINARY_OP(T, +) \
SLANG_MATRIX_BINARY_OP(T, -) \
SLANG_MATRIX_BINARY_OP(T, *) \
SLANG_MATRIX_BINARY_OP(T, /) \
SLANG_MATRIX_UNARY_OP(T, -)
SLANG_INT_MATRIX_OPS(int)
SLANG_INT_MATRIX_OPS(int8_t)
SLANG_INT_MATRIX_OPS(int16_t)
SLANG_INT_MATRIX_OPS(int64_t)
SLANG_INT_MATRIX_OPS(uint)
SLANG_INT_MATRIX_OPS(uint8_t)
SLANG_INT_MATRIX_OPS(uint16_t)
SLANG_INT_MATRIX_OPS(uint64_t)
SLANG_FLOAT_MATRIX_OPS(float)
SLANG_FLOAT_MATRIX_OPS(double)
#define SLANG_MATRIX_INT_NEG_OP(T) \
template<int R, int C> \
SLANG_FORCE_INLINE Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result.rows[i][j] = 0 - thisVal.rows[i][j]; \
return result; \
}
SLANG_MATRIX_INT_NEG_OP(int)
SLANG_MATRIX_INT_NEG_OP(int8_t)
SLANG_MATRIX_INT_NEG_OP(int16_t)
SLANG_MATRIX_INT_NEG_OP(int64_t)
SLANG_MATRIX_INT_NEG_OP(uint)
SLANG_MATRIX_INT_NEG_OP(uint8_t)
SLANG_MATRIX_INT_NEG_OP(uint16_t)
SLANG_MATRIX_INT_NEG_OP(uint64_t)
#define SLANG_FLOAT_MATRIX_MOD(T) \
template<int R, int C> \
SLANG_FORCE_INLINE Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]); \
return result; \
}
SLANG_FLOAT_MATRIX_MOD(float)
SLANG_FLOAT_MATRIX_MOD(double)
#undef SLANG_FLOAT_MATRIX_MOD
#undef SLANG_MATRIX_BINARY_OP
#undef SLANG_MATRIX_UNARY_OP
#undef SLANG_INT_MATRIX_OPS
#undef SLANG_FLOAT_MATRIX_OPS
#undef SLANG_MATRIX_INT_NEG_OP
#undef SLANG_FLOAT_MATRIX_MOD
template<typename TResult, typename TInput>
TResult slang_bit_cast(TInput val)
{
return *(TResult*)(&val);
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,8 +0,0 @@
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
#endif
#ifndef __DXC_VERSION_MAJOR
// warning X3557: loop doesn't seem to do anything, forcing loop to unroll
#pragma warning(disable : 3557)
#endif

View file

@ -1,49 +0,0 @@
// slang-image-format-defs.h
#ifndef SLANG_FORMAT
#error Must define SLANG_FORMAT macro before including image-format-defs.h
#endif
SLANG_FORMAT(unknown, (NONE, 0, 0))
SLANG_FORMAT(rgba32f, (FLOAT32, 4, sizeof(float) * 4))
SLANG_FORMAT(rgba16f, (FLOAT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rg32f, (FLOAT32, 2, sizeof(float) * 2))
SLANG_FORMAT(rg16f, (FLOAT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(r11f_g11f_b10f, (NONE, 3, sizeof(uint32_t)))
SLANG_FORMAT(r32f, (FLOAT32, 1, sizeof(float)))
SLANG_FORMAT(r16f, (FLOAT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(rgba16, (UINT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rgb10_a2, (NONE, 4, sizeof(uint32_t)))
SLANG_FORMAT(rgba8, (UINT8, 4, sizeof(uint32_t)))
SLANG_FORMAT(rg16, (UINT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(rg8, (UINT8, 2, sizeof(char) * 2))
SLANG_FORMAT(r16, (UINT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(r8, (UINT8, 1, sizeof(uint8_t)))
SLANG_FORMAT(rgba16_snorm, (UINT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rgba8_snorm, (UINT8, 4, sizeof(uint8_t) * 4))
SLANG_FORMAT(rg16_snorm, (UINT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(rg8_snorm, (UINT8, 2, sizeof(uint8_t) * 2))
SLANG_FORMAT(r16_snorm, (UINT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(r8_snorm, (UINT8, 1, sizeof(uint8_t)))
SLANG_FORMAT(rgba32i, (INT32, 4, sizeof(int32_t) * 4))
SLANG_FORMAT(rgba16i, (INT16, 4, sizeof(int16_t) * 4))
SLANG_FORMAT(rgba8i, (INT8, 4, sizeof(int8_t) * 4))
SLANG_FORMAT(rg32i, (INT32, 2, sizeof(int32_t) * 2))
SLANG_FORMAT(rg16i, (INT16, 2, sizeof(int16_t) * 2))
SLANG_FORMAT(rg8i, (INT8, 2, sizeof(int8_t) * 2))
SLANG_FORMAT(r32i, (INT32, 1, sizeof(int32_t)))
SLANG_FORMAT(r16i, (INT16, 1, sizeof(int16_t)))
SLANG_FORMAT(r8i, (INT8, 1, sizeof(int8_t)))
SLANG_FORMAT(rgba32ui, (UINT32, 4, sizeof(uint32_t) * 4))
SLANG_FORMAT(rgba16ui, (UINT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rgb10_a2ui, (NONE, 4, sizeof(uint32_t)))
SLANG_FORMAT(rgba8ui, (UINT8, 4, sizeof(uint8_t) * 4))
SLANG_FORMAT(rg32ui, (UINT32, 2, sizeof(uint32_t) * 2))
SLANG_FORMAT(rg16ui, (UINT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(rg8ui, (UINT8, 2, sizeof(uint8_t) * 2))
SLANG_FORMAT(r32ui, (UINT32, 1, sizeof(uint32_t)))
SLANG_FORMAT(r16ui, (UINT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(r8ui, (UINT8, 1, sizeof(uint8_t)))
SLANG_FORMAT(r64ui, (UINT64, 1, sizeof(uint64_t)))
SLANG_FORMAT(r64i, (INT64, 1, sizeof(int64_t)))
#undef SLANG_FORMAT

View file

@ -1,404 +0,0 @@
#ifndef SLANG_LLVM_H
#define SLANG_LLVM_H
// TODO(JS):
// Disable exception declspecs, as not supported on LLVM without some extra options.
// We could enable with `-fms-extensions`
#define SLANG_DISABLE_EXCEPTIONS 1
#ifndef SLANG_PRELUDE_ASSERT
#ifdef SLANG_PRELUDE_ENABLE_ASSERT
extern "C" void assertFailure(const char* msg);
#define SLANG_PRELUDE_EXPECT(VALUE, MSG) \
if (VALUE) \
{ \
} \
else \
assertFailure("assertion failed: '" MSG "'")
#define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE)
#else // SLANG_PRELUDE_ENABLE_ASSERT
#define SLANG_PRELUDE_EXPECT(VALUE, MSG)
#define SLANG_PRELUDE_ASSERT(x)
#endif // SLANG_PRELUDE_ENABLE_ASSERT
#endif
/*
Taken from stddef.h
*/
typedef __PTRDIFF_TYPE__ ptrdiff_t;
typedef __SIZE_TYPE__ size_t;
typedef __SIZE_TYPE__ rsize_t;
// typedef __WCHAR_TYPE__ wchar_t;
#if defined(__need_NULL)
#undef NULL
#ifdef __cplusplus
#if !defined(__MINGW32__) && !defined(_MSC_VER)
#define NULL __null
#else
#define NULL 0
#endif
#else
#define NULL ((void*)0)
#endif
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
namespace std
{
typedef decltype(nullptr) nullptr_t;
}
using ::std::nullptr_t;
#endif
#endif
#undef __need_NULL
#endif /* defined(__need_NULL) */
/*
The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed.
*/
// LLVM/Clang types such that we can use LLVM/Clang without headers for C++ output from Slang
#ifdef __INT64_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
typedef __INT64_TYPE__ int64_t;
#endif /* __int8_t_defined */
typedef __UINT64_TYPE__ uint64_t;
#define __int_least64_t int64_t
#define __uint_least64_t uint64_t
#endif /* __INT64_TYPE__ */
#ifdef __int_least64_t
typedef __int_least64_t int_least64_t;
typedef __uint_least64_t uint_least64_t;
typedef __int_least64_t int_fast64_t;
typedef __uint_least64_t uint_fast64_t;
#endif /* __int_least64_t */
#ifdef __INT32_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
typedef __INT32_TYPE__ int32_t;
#endif /* __int8_t_defined */
#ifndef __uint32_t_defined /* more glibc compatibility */
#define __uint32_t_defined
typedef __UINT32_TYPE__ uint32_t;
#endif /* __uint32_t_defined */
#define __int_least32_t int32_t
#define __uint_least32_t uint32_t
#endif /* __INT32_TYPE__ */
#ifdef __int_least32_t
typedef __int_least32_t int_least32_t;
typedef __uint_least32_t uint_least32_t;
typedef __int_least32_t int_fast32_t;
typedef __uint_least32_t uint_fast32_t;
#endif /* __int_least32_t */
#ifdef __INT16_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/
typedef __INT16_TYPE__ int16_t;
#endif /* __int8_t_defined */
typedef __UINT16_TYPE__ uint16_t;
#define __int_least16_t int16_t
#define __uint_least16_t uint16_t
#endif /* __INT16_TYPE__ */
#ifdef __int_least16_t
typedef __int_least16_t int_least16_t;
typedef __uint_least16_t uint_least16_t;
typedef __int_least16_t int_fast16_t;
typedef __uint_least16_t uint_fast16_t;
#endif /* __int_least16_t */
#ifdef __INT8_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/
typedef __INT8_TYPE__ int8_t;
#endif /* __int8_t_defined */
typedef __UINT8_TYPE__ uint8_t;
#define __int_least8_t int8_t
#define __uint_least8_t uint8_t
#endif /* __INT8_TYPE__ */
#ifdef __int_least8_t
typedef __int_least8_t int_least8_t;
typedef __uint_least8_t uint_least8_t;
typedef __int_least8_t int_fast8_t;
typedef __uint_least8_t uint_fast8_t;
#endif /* __int_least8_t */
/* prevent glibc sys/types.h from defining conflicting types */
#ifndef __int8_t_defined
#define __int8_t_defined
#endif /* __int8_t_defined */
/* C99 7.18.1.4 Integer types capable of holding object pointers.
*/
#define __stdint_join3(a, b, c) a##b##c
#ifndef _INTPTR_T
#ifndef __intptr_t_defined
typedef __INTPTR_TYPE__ intptr_t;
#define __intptr_t_defined
#define _INTPTR_T
#endif
#endif
#ifndef _UINTPTR_T
typedef __UINTPTR_TYPE__ uintptr_t;
#define _UINTPTR_T
#endif
/* C99 7.18.1.5 Greatest-width integer types.
*/
typedef __INTMAX_TYPE__ intmax_t;
typedef __UINTMAX_TYPE__ uintmax_t;
/* C99 7.18.4 Macros for minimum-width integer constants.
*
* The standard requires that integer constant macros be defined for all the
* minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width
* types are required, the corresponding integer constant macros are defined
* here. This implementation also defines minimum-width types for every other
* integer width that the target implements, so corresponding macros are
* defined below, too.
*
* These macros are defined using the same successive-shrinking approach as
* the type definitions above. It is likewise important that macros are defined
* in order of decending width.
*
* Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the
* claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
*/
#define __int_c_join(a, b) a##b
#define __int_c(v, suffix) __int_c_join(v, suffix)
#define __uint_c(v, suffix) __int_c_join(v##U, suffix)
#ifdef __INT64_TYPE__
#ifdef __INT64_C_SUFFIX__
#define __int64_c_suffix __INT64_C_SUFFIX__
#else
#undef __int64_c_suffix
#endif /* __INT64_C_SUFFIX__ */
#endif /* __INT64_TYPE__ */
#ifdef __int_least64_t
#ifdef __int64_c_suffix
#define INT64_C(v) __int_c(v, __int64_c_suffix)
#define UINT64_C(v) __uint_c(v, __int64_c_suffix)
#else
#define INT64_C(v) v
#define UINT64_C(v) v##U
#endif /* __int64_c_suffix */
#endif /* __int_least64_t */
#ifdef __INT32_TYPE__
#ifdef __INT32_C_SUFFIX__
#define __int32_c_suffix __INT32_C_SUFFIX__
#else
#undef __int32_c_suffix
#endif /* __INT32_C_SUFFIX__ */
#endif /* __INT32_TYPE__ */
#ifdef __int_least32_t
#ifdef __int32_c_suffix
#define INT32_C(v) __int_c(v, __int32_c_suffix)
#define UINT32_C(v) __uint_c(v, __int32_c_suffix)
#else
#define INT32_C(v) v
#define UINT32_C(v) v##U
#endif /* __int32_c_suffix */
#endif /* __int_least32_t */
#ifdef __INT16_TYPE__
#ifdef __INT16_C_SUFFIX__
#define __int16_c_suffix __INT16_C_SUFFIX__
#else
#undef __int16_c_suffix
#endif /* __INT16_C_SUFFIX__ */
#endif /* __INT16_TYPE__ */
#ifdef __int_least16_t
#ifdef __int16_c_suffix
#define INT16_C(v) __int_c(v, __int16_c_suffix)
#define UINT16_C(v) __uint_c(v, __int16_c_suffix)
#else
#define INT16_C(v) v
#define UINT16_C(v) v##U
#endif /* __int16_c_suffix */
#endif /* __int_least16_t */
#ifdef __INT8_TYPE__
#ifdef __INT8_C_SUFFIX__
#define __int8_c_suffix __INT8_C_SUFFIX__
#else
#undef __int8_c_suffix
#endif /* __INT8_C_SUFFIX__ */
#endif /* __INT8_TYPE__ */
#ifdef __int_least8_t
#ifdef __int8_c_suffix
#define INT8_C(v) __int_c(v, __int8_c_suffix)
#define UINT8_C(v) __uint_c(v, __int8_c_suffix)
#else
#define INT8_C(v) v
#define UINT8_C(v) v##U
#endif /* __int8_c_suffix */
#endif /* __int_least8_t */
/* C99 7.18.2.1 Limits of exact-width integer types.
* C99 7.18.2.2 Limits of minimum-width integer types.
* C99 7.18.2.3 Limits of fastest minimum-width integer types.
*
* The presence of limit macros are completely optional in C99. This
* implementation defines limits for all of the types (exact- and
* minimum-width) that it defines above, using the limits of the minimum-width
* type for any types that do not have exact-width representations.
*
* As in the type definitions, this section takes an approach of
* successive-shrinking to determine which limits to use for the standard (8,
* 16, 32, 64) bit widths when they don't have exact representations. It is
* therefore important that the definitions be kept in order of decending
* widths.
*
* Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the
* claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
*/
#ifdef __INT64_TYPE__
#define INT64_MAX INT64_C(9223372036854775807)
#define INT64_MIN (-INT64_C(9223372036854775807) - 1)
#define UINT64_MAX UINT64_C(18446744073709551615)
#define __INT_LEAST64_MIN INT64_MIN
#define __INT_LEAST64_MAX INT64_MAX
#define __UINT_LEAST64_MAX UINT64_MAX
#endif /* __INT64_TYPE__ */
#ifdef __INT_LEAST64_MIN
#define INT_LEAST64_MIN __INT_LEAST64_MIN
#define INT_LEAST64_MAX __INT_LEAST64_MAX
#define UINT_LEAST64_MAX __UINT_LEAST64_MAX
#define INT_FAST64_MIN __INT_LEAST64_MIN
#define INT_FAST64_MAX __INT_LEAST64_MAX
#define UINT_FAST64_MAX __UINT_LEAST64_MAX
#endif /* __INT_LEAST64_MIN */
#ifdef __INT32_TYPE__
#define INT32_MAX INT32_C(2147483647)
#define INT32_MIN (-INT32_C(2147483647) - 1)
#define UINT32_MAX UINT32_C(4294967295)
#define __INT_LEAST32_MIN INT32_MIN
#define __INT_LEAST32_MAX INT32_MAX
#define __UINT_LEAST32_MAX UINT32_MAX
#endif /* __INT32_TYPE__ */
#ifdef __INT_LEAST32_MIN
#define INT_LEAST32_MIN __INT_LEAST32_MIN
#define INT_LEAST32_MAX __INT_LEAST32_MAX
#define UINT_LEAST32_MAX __UINT_LEAST32_MAX
#define INT_FAST32_MIN __INT_LEAST32_MIN
#define INT_FAST32_MAX __INT_LEAST32_MAX
#define UINT_FAST32_MAX __UINT_LEAST32_MAX
#endif /* __INT_LEAST32_MIN */
#ifdef __INT16_TYPE__
#define INT16_MAX INT16_C(32767)
#define INT16_MIN (-INT16_C(32767) - 1)
#define UINT16_MAX UINT16_C(65535)
#define __INT_LEAST16_MIN INT16_MIN
#define __INT_LEAST16_MAX INT16_MAX
#define __UINT_LEAST16_MAX UINT16_MAX
#endif /* __INT16_TYPE__ */
#ifdef __INT_LEAST16_MIN
#define INT_LEAST16_MIN __INT_LEAST16_MIN
#define INT_LEAST16_MAX __INT_LEAST16_MAX
#define UINT_LEAST16_MAX __UINT_LEAST16_MAX
#define INT_FAST16_MIN __INT_LEAST16_MIN
#define INT_FAST16_MAX __INT_LEAST16_MAX
#define UINT_FAST16_MAX __UINT_LEAST16_MAX
#endif /* __INT_LEAST16_MIN */
#ifdef __INT8_TYPE__
#define INT8_MAX INT8_C(127)
#define INT8_MIN (-INT8_C(127) - 1)
#define UINT8_MAX UINT8_C(255)
#define __INT_LEAST8_MIN INT8_MIN
#define __INT_LEAST8_MAX INT8_MAX
#define __UINT_LEAST8_MAX UINT8_MAX
#endif /* __INT8_TYPE__ */
#ifdef __INT_LEAST8_MIN
#define INT_LEAST8_MIN __INT_LEAST8_MIN
#define INT_LEAST8_MAX __INT_LEAST8_MAX
#define UINT_LEAST8_MAX __UINT_LEAST8_MAX
#define INT_FAST8_MIN __INT_LEAST8_MIN
#define INT_FAST8_MAX __INT_LEAST8_MAX
#define UINT_FAST8_MAX __UINT_LEAST8_MAX
#endif /* __INT_LEAST8_MIN */
/* Some utility macros */
#define __INTN_MIN(n) __stdint_join3(INT, n, _MIN)
#define __INTN_MAX(n) __stdint_join3(INT, n, _MAX)
#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
#define __INTN_C(n, v) __stdint_join3(INT, n, _C(v))
#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */
/* C99 7.18.3 Limits of other integer types. */
#define INTPTR_MIN (-__INTPTR_MAX__ - 1)
#define INTPTR_MAX __INTPTR_MAX__
#define UINTPTR_MAX __UINTPTR_MAX__
#define PTRDIFF_MIN (-__PTRDIFF_MAX__ - 1)
#define PTRDIFF_MAX __PTRDIFF_MAX__
#define SIZE_MAX __SIZE_MAX__
/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__
* is enabled. */
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
#define RSIZE_MAX (SIZE_MAX >> 1)
#endif
/* C99 7.18.2.5 Limits of greatest-width integer types. */
#define INTMAX_MIN (-__INTMAX_MAX__ - 1)
#define INTMAX_MAX __INTMAX_MAX__
#define UINTMAX_MAX __UINTMAX_MAX__
/* C99 7.18.3 Limits of other integer types. */
#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)
#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)
#ifdef __WINT_UNSIGNED__
#define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)
#define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)
#else
#define WINT_MIN __INTN_MIN(__WINT_WIDTH__)
#define WINT_MAX __INTN_MAX(__WINT_WIDTH__)
#endif
#ifndef WCHAR_MAX
#define WCHAR_MAX __WCHAR_MAX__
#endif
#ifndef WCHAR_MIN
#if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
#else
#define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
#endif
#endif
/* 7.18.4.2 Macros for greatest-width integer constants. */
#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
#endif // SLANG_LLVM_H

View file

@ -1 +0,0 @@
#define SLANG_TAG_VERSION "2025.3.1"

View file

@ -1,181 +0,0 @@
// Prelude for PyTorch cpp binding.
// clang-format off
#include <torch/extension.h>
// clang-format on
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAUtils.h>
#include <stdexcept>
#include <string>
#include <vector>
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
#if SLANG_GCC_FAMILY && __GNUC__ < 6
#include <cmath>
#define SLANG_PRELUDE_STD std::
#else
#include <math.h>
#define SLANG_PRELUDE_STD
#endif
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#endif // SLANG_LLVM
#include "../source/core/slang-string.h"
#if defined(_MSC_VER)
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
// __attribute__((__visibility__("default")))
#endif
#ifdef __cplusplus
#define SLANG_PRELUDE_EXTERN_C extern "C"
#define SLANG_PRELUDE_EXTERN_C_START \
extern "C" \
{
#define SLANG_PRELUDE_EXTERN_C_END }
#else
#define SLANG_PRELUDE_EXTERN_C
#define SLANG_PRELUDE_EXTERN_C_START
#define SLANG_PRELUDE_EXTERN_C_END
#endif
#define SLANG_PRELUDE_NAMESPACE
#ifndef SLANG_NO_THROW
#define SLANG_NO_THROW
#endif
#ifndef SLANG_STDCALL
#define SLANG_STDCALL
#endif
#ifndef SLANG_MCALL
#define SLANG_MCALL SLANG_STDCALL
#endif
#ifndef SLANG_FORCE_INLINE
#define SLANG_FORCE_INLINE inline
#endif
#include "slang-cpp-scalar-intrinsics.h"
#include "slang-cpp-types-core.h"
static const int kSlangTorchTensorMaxDim = 5;
struct TensorView
{
uint8_t* data;
uint32_t strides[kSlangTorchTensorMaxDim];
uint32_t sizes[kSlangTorchTensorMaxDim];
uint32_t dimensionCount;
};
TensorView make_tensor_view(
torch::Tensor val,
const char* name,
torch::ScalarType targetScalarType,
bool requireContiguous)
{
// We're currently not trying to implicitly cast or transfer to device for two reasons:
// 1. There appears to be a bug with .to() where successive calls after the first one fail.
// 2. Silent casts like this can cause large memory allocations & unexpected overheads.
// It's better to be explicit.
// Expect tensors to be on CUDA device
if (!val.device().is_cuda())
throw std::runtime_error(
std::string(name).append(": tensor is not on CUDA device.").c_str());
// Expect tensors to be the right type.
if (val.dtype() != targetScalarType)
throw std::runtime_error(
std::string(name).append(": tensor is not of the expected type.").c_str());
// Check that the tensor is contiguous
if (requireContiguous && !val.is_contiguous())
throw std::runtime_error(std::string(name).append(": tensor is not contiguous.").c_str());
TensorView res = {};
res.dimensionCount = val.dim();
res.data = nullptr;
size_t elementSize = 4;
switch (val.scalar_type())
{
case torch::kInt8:
case torch::kUInt8:
elementSize = 1;
res.data = (uint8_t*)val.data_ptr<uint8_t>();
break;
case torch::kBFloat16:
elementSize = 2;
res.data = (uint8_t*)val.data_ptr<torch::BFloat16>();
break;
case torch::kFloat16:
elementSize = 2;
res.data = (uint8_t*)val.data_ptr<at::Half>();
break;
case torch::kInt16:
elementSize = 2;
res.data = (uint8_t*)val.data_ptr<int16_t>();
break;
case torch::kFloat32:
elementSize = 4;
res.data = (uint8_t*)val.data_ptr<float>();
break;
case torch::kInt32:
elementSize = 4;
res.data = (uint8_t*)val.data_ptr<int32_t>();
break;
case torch::kFloat64:
elementSize = 8;
res.data = (uint8_t*)val.data_ptr<double>();
break;
case torch::kInt64:
elementSize = 8;
res.data = (uint8_t*)val.data_ptr<int64_t>();
break;
case torch::kBool:
elementSize = 1;
res.data = (uint8_t*)val.data_ptr<bool>();
break;
}
if (val.dim() > kSlangTorchTensorMaxDim)
throw std::runtime_error(std::string(name)
.append(": number of dimensions exceeds limit (")
.append(std::to_string(kSlangTorchTensorMaxDim))
.append(")")
.c_str());
bool isEmpty = true;
for (int i = 0; i < val.dim(); ++i)
{
res.strides[i] = val.stride(i) * elementSize;
if (res.strides[i] == 0)
throw std::runtime_error(
std::string(name)
.append(": tensors with broadcasted dimensions are not supported (use "
"tensor.contiguous() to make tensor whole)")
.c_str());
res.sizes[i] = val.size(i);
if (res.sizes[i] > 0)
isEmpty = false;
}
if (!res.data && !isEmpty)
throw std::runtime_error(std::string(name).append(": data pointer is invalid.").c_str());
return res;
}
#define SLANG_PRELUDE_EXPORT

File diff suppressed because it is too large Load diff

View file

@ -1,156 +0,0 @@
Slang 64-bit Type Support
=========================
## Summary
* Not all targets support 64 bit types, or all 64 bit types
* 64 bit integers generally require later APIs/shader models
* When specifying 64 bit floating-point literals *always* use the type suffixes (ie `L`)
* An integer literal will be interpreted as 64 bits if it cannot fit in a 32 bit value.
* GPU target/s generally do not support all double intrinsics
* Typically missing are trascendentals (sin, cos etc), logarithm and exponential functions
* CUDA is the exception supporting nearly all double intrinsics
* D3D
* D3D targets *appear* to support double intrinsics (like sin, cos, log etc), but behind the scenes they are actually being converted to float
* When using D3D12, it is best to use DXIL if you use double because there are some serious issues around double and DXBC
* VK will produce an error in validation if a double intrinsic is used it does support (which is most of them)
* Vector and Matrix types have even spottier than scalar intrinsic support across targets
Overview
========
The Slang language supports 64 bit built in types. Such as
* `double`
* `uint64_t`
* `int64_t`
This also applies to vector and matrix versions of these types.
Unfortunately if a specific target supports the type or the typical HLSL intrinsic functions (such as sin/cos/max/min etc) depends very much on the target.
Special attention has to be made with respect to literal 64 bit types. By default float literals if they do not have an explicit suffix are assumed to be 32 bit. There is a variety of reasons for this design choice - the main one being around by default behavior of getting good performance. The suffixes required for 64 bit types are as follows
```
// double - 'l' or 'L'
double a = 1.34e-200L;
// WRONG!: This is the same as b = double(float(1.34e-200)) which will be 0. Will produce a warning.
double b = 1.34e-200;
// int64_t - 'll' or 'LL' (or combination of upper/lower)
int64_t c = -5436365345345234ll;
int64_t e = ~0LL; // Same as 0xffffffffffffffff
// uint64_t - 'ull' or 'ULL' (or combination of upper/lower)
uint64_t g = 0x8000000000000000ull;
uint64_t i = ~0ull; // Same as 0xffffffffffffffff
uint64_t j = ~0; // Equivalent to 'i' because uint64_t(int64_t(~int32_t(0)));
```
These issues are discussed more on issue [#1185](https://github.com/shader-slang/slang/issues/1185)
The type of a decimal non-suffixed integer literal is the first integer type from the list [`int`, `int64_t`]
which can represent the specified literal value. If the value cannot fit, the literal is represented as an `uint64_t`
and a warning is given.
The type of a hexadecimal non-suffixed integer literal is the first type from the list [`int`, `uint`, `int64_t`, `uint64_t`]
that can represent the specified literal value. A non-suffixed integer literal will be 64 bit if it cannot fit in 32 bits.
```
// Same as int64_t a = int(1), the value can fit into a 32 bit integer.
int64_t a = 1;
// Same as int64_t b = int64_t(2147483648), the value cannot fit into a 32 bit integer.
int64_t b = 2147483648;
// Same as int64_t c = uint64_t(18446744073709551615), the value is larger than the maximum value of a signed 64 bit
// integer, and is interpreted as an unsigned 64 bit integer. Warning is given.
uint64_t c = 18446744073709551615;
// Same as uint64_t = int(0x7FFFFFFF), the value can fit into a 32 bit integer.
uint64_t d = 0x7FFFFFFF;
// Same as uint64_t = int64_t(0x7FFFFFFFFFFFFFFF), the value cannot fit into an unsigned 32 bit integer but
// can fit into a signed 64 bit integer.
uint64_t e = 0x7FFFFFFFFFFFFFFF;
// Same as uint64_t = uint64_t(0xFFFFFFFFFFFFFFFF), the value cannot fit into a signed 64 bit integer, and
// is interpreted as an unsigned 64 bit integer.
uint64_t f = 0xFFFFFFFFFFFFFFFF;
```
Double support
==============
Target | Compiler/Binary | Double Type | Intrinsics | Notes
---------|------------------|----------------|-----------------------|-----------
CPU | | Yes | Yes | 1
CUDA | Nvrtx/PTX | Yes | Yes | 1
D3D12 | DXC/DXIL | Yes | Small Subset | 4
Vulkan | GlSlang/Spir-V | Yes | Partial | 2
D3D11 | FXC/DXBC | Yes | Small Subset | 4
D3D12 | FXC/DXBC | Yes | Small Subset | 3, 4
1) CUDA and CPU support most intrinsics, with the notable exception currently of matrix invert
2) In terms of lack of general intrinsic support, the restriction is described in https://www.khronos.org/registry/spir-v/specs/1.0/GLSL.std.450.html
The following intrinsics are available for Vulkan
`fmod` (as %), `rcp`, `sign`, `saturate`, `sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `abs`, `min`, `max`, `smoothstep`, `lerp`, `clamp`, `step` and `asuint`.
These are tested in the test `tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang`.
What is missing are transedentals, expX, logX.
Note that GlSlang does produce Spir-V that contains double intrinsic calls for the missing intrinsics, the failure happens when validating the Spir-V
```
Validation: error 0: [ UNASSIGNED-CoreValidation-Shader-InconsistentSpirv ] Object: VK_NULL_HANDLE (Type = 0) | SPIR-V module not valid: GLSL.std.450 Sin: expected Result Type to be a 16 or 32-bit scalar or vector float type
%57 = OpExtInst %double %1 Sin %56
```
3) That if a RWStructuredBuffer<double> is used on D3D12 with DXBC, and a double is written, it can lead to incorrect behavior. Thus it is recommended not to use double with dxbc, but to use dxil to keep things simple. A test showing this problem is `tests/bugs/dxbc-double-problem.slang`. The test `tests/hlsl-intrinsic/scalar-double-simple.slang` shows not using a double resource, doubles do appear to work on D3D12 DXBC.
4) If you compile code using double and intrinsics through Slang at first blush it will seem to work. Assuming there are no errors in your code, your code will even typically appear to work correctly. Unfortunately what is really happening is the backend compiler (fxc or dxc) compiler is narrowing double to float and then using float intrinsics. It typically generates a warning when this happens, but unless there is an error in your code you will not see these warnings because dxc doesn't appear to have a mechanism to return warnings if there isn't an error. This is why everything appears to work - but actually any intrinsic call is losing precision silently.
Note on dxc by default Slang disables warnings - warnings need to be enabled to see the narrowing warnings.
There is another exception around the use of % - if you do this with double it will return an error saying on float is supported.
It appears that no intrinsics are available for double with fxc.
On dxc the following intrinsics are available with double::
`rcp`, `sign`, `saturate`, `abs`, `min`, `max`, `clamp`, `asuint`.
These are tested in the test `tests/hlsl-intrinsic/scalar-double-d3d-intrinsic.slang`.
There is no support for transcendentals (`sin`, `cos` etc) or `log`/`exp`. More surprising is that `sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `step`, `lerp`, `smoothstep` are also not supported.
uint64_t and int64_t Support
============================
Target | Compiler/Binary | u/int64_t Type | Intrinsic support | Notes
---------|------------------|----------------|--------------------|--------
CPU | | Yes | Yes |
CUDA | Nvrtx/PTX | Yes | Yes |
Vulkan | GlSlang/Spir-V | Yes | Yes |
D3D12 | DXC/DXIL | Yes | Yes | 1
D3D11 | FXC/DXBC | No | No | 2
D3D12 | FXC/DXBC | No | No | 2
1) The [sm6.0 docs](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12) describe only supporting uint64_t, but dxc says int64_t is supported in [HLSL 2016](https://github.com/Microsoft/DirectXShaderCompiler/wiki/Language-Versions). Tests show that this is indeed the case.
2) uint64_t support requires https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12, so DXBC is not a target.
The intrinsics available on `uint64_t` type are `abs`, `min`, `max`, `clamp` and `countbits`.
The intrinsics available on `uint64_t` type are `abs`, `min`, `max` and `clamp`.
GLSL
====
GLSL/Spir-v based targets do not support 'generated' intrinsics on matrix types. For example 'sin(mat)' will not work on GLSL/Spir-v.

View file

@ -1,35 +0,0 @@
Slang Documentation
===================
This directory contains documentation for the Slang system.
Some of the documentation is intended for users of the language and compiler, while other documentation is intended for developers contributing to the project.
Getting Started
---------------
The Slang [User's Guide](https://shader-slang.github.io/slang/user-guide/) provides an introduction to the Slang language and its major features, as well as the compilation and reflection API.
There is also documentation specific to using the [slangc](https://shader-slang.github.io/slang/user-guide/compiling.html#command-line-compilation-with-slangc) command-line tool.
Advanced Users
--------------
For the benefit of advanced users we provide detailed documentation on how Slang compiles code for specific platforms.
The [target compatibility guide](target-compatibility.md) gives an overview of feature compatibility for targets.
The [CPU target guide](cpu-target.md) gives information on compiling Slang or C++ source into shared libraries/executables or functions that can be directly executed. It also covers how to generate C++ code from Slang source.
The [CUDA target guide](cuda-target.md) provides information on compiling Slang/HLSL or CUDA source. Slang can compile to equivalent CUDA source, as well as to PTX via the nvrtc CUDA compiler.
Contributors
------------
For contributors to the Slang project, the information under the [`design/`](design/) directory may help explain the rationale behind certain design decisions and help when ramping up in the codebase.
Research
--------
The Slang project is based on a long history of research work. While understanding this research is not necessary for working with Slang, it may be instructive for understanding the big-picture goals of the language, as well as why certain critical decisions were made.
A [paper](http://graphics.cs.cmu.edu/projects/slang/) on the Slang system was accepted into SIGGRAPH 2018, and it provides an overview of the language and the compiler implementation.
Yong He's [dissertation](http://graphics.cs.cmu.edu/projects/renderergenerator/yong_he_thesis.pdf) provided more detailed discussion of the design of the Slang system.

View file

@ -1 +0,0 @@
theme: jekyll-theme-tactile

View file

@ -1,137 +0,0 @@
{% capture headingsWorkspace %}
{% comment %}
Copyright (c) 2018 Vladimir "allejo" Jimenez
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
{% endcomment %}
{% comment %}
Version 1.0.9
https://github.com/allejo/jekyll-anchor-headings
"Be the pull request you wish to see in the world." ~Ben Balter
Usage:
{% include anchor_headings.html html=content anchorBody="#" %}
Parameters:
* html (string) - the HTML of compiled markdown generated by kramdown in Jekyll
Optional Parameters:
* beforeHeading (bool) : false - Set to true if the anchor should be placed _before_ the heading's content
* headerAttrs (string) : '' - Any custom HTML attributes that will be added to the heading tag; you may NOT use `id`;
the `%heading%` and `%html_id%` placeholders are available
* anchorAttrs (string) : '' - Any custom HTML attributes that will be added to the `<a>` tag; you may NOT use `href`,
`class` or `title`;
the `%heading%` and `%html_id%` placeholders are available
* anchorBody (string) : '' - The content that will be placed inside the anchor; the `%heading%` placeholder is
available
* anchorClass (string) : '' - The class(es) that will be used for each anchor. Separate multiple classes with a
space
* anchorTitle (string) : '' - The `title` attribute that will be used for anchors
* h_min (int) : 1 - The minimum header level to build an anchor for; any header lower than this value will be
ignored
* h_max (int) : 6 - The maximum header level to build an anchor for; any header greater than this value will be
ignored
* bodyPrefix (string) : '' - Anything that should be inserted inside of the heading tag _before_ its anchor and
content
* bodySuffix (string) : '' - Anything that should be inserted inside of the heading tag _after_ its anchor and
content
Output:
The original HTML with the addition of anchors inside of all of the h1-h6 headings.
{% endcomment %}
{% assign minHeader = include.h_min | default: 1 %}
{% assign maxHeader = include.h_max | default: 2 %}
{% assign beforeHeading = include.beforeHeading %}
{% assign nodes = include.html | split: '<h' %} {% capture edited_headings %}{% endcapture %} {% for _node in nodes
%} {% capture node %}{{ _node | strip }}{% endcapture %} {% if node=="" %} {% continue %} {% endif %} {% assign
nextChar=node | replace: '"' , '' | strip | slice: 0, 1 %} {% assign headerLevel=nextChar | times: 1 %} <!-- If
the level is cast to 0, it means it's not a h1-h6 tag, so let's see if we need to fix it -->
{% if headerLevel == 0 %}
<!-- Split up the node based on closing angle brackets and get the first one. -->
{% assign firstChunk = node | split: '>' | first %}
<!-- If the first chunk does NOT contain a '<', that means we've broken another HTML tag that starts with 'h' -->
{% unless firstChunk contains '<' %} {% capture node %}<h{{ node }}{% endcapture %} {% endunless %} {% capture
edited_headings %}{{ edited_headings }}{{ node }}{% endcapture %} {% continue %} {% endif %} {% capture
_closingTag %}</h{{ headerLevel }}>{% endcapture %}
{% assign _workspace = node | split: _closingTag %}
{% assign _idWorkspace = _workspace[0] | split: 'id="' %}
{% assign _idWorkspace = _idWorkspace[1] | split: '"' %}
{% assign html_id = _idWorkspace[0] %}
{% capture _hAttrToStrip %}{{ _workspace[0] | split: '>' | first }}>{% endcapture %}
{% assign header = _workspace[0] | replace: _hAttrToStrip, '' %}
<!-- Build the anchor to inject for our heading -->
{% capture anchor %}{% endcapture %}
{% if html_id and headerLevel >= minHeader and headerLevel <= maxHeader %} {% assign escaped_header=header |
strip_html %} {% if include.headerAttrs %} {% capture _hAttrToStrip %}{{ _hAttrToStrip | split: '>' |
first }} {{ include.headerAttrs | replace: '%heading%' , escaped_header | replace: '%html_id%' , html_id
}}>{% endcapture %}
{% endif %}
{% capture anchor %}href="#{{ html_id }}"{% endcapture %}
{% if include.anchorClass %}
{% capture anchor %}{{ anchor }} class="{{ include.anchorClass }}"{% endcapture %}
{% endif %}
{% if include.anchorTitle %}
{% capture anchor %}{{ anchor }} title="{{ include.anchorTitle | replace: '%heading%', escaped_header
}}"{% endcapture %}
{% endif %}
{% if include.anchorAttrs %}
{% capture anchor %}{{ anchor }} {{ include.anchorAttrs | replace: '%heading%', escaped_header |
replace: '%html_id%', html_id }}{% endcapture %}
{% endif %}
{% capture anchor %}<a {{ anchor }}>{{ include.anchorBody | replace: '%heading%', escaped_header |
default: '' }}</a>{% endcapture %}
<!-- In order to prevent adding extra space after a heading, we'll let the 'anchor' value contain it -->
{% if beforeHeading %}
{% capture anchor %}{{ anchor }} {% endcapture %}
{% else %}
{% capture anchor %} {{ anchor }}{% endcapture %}
{% endif %}
{% endif %}
{% capture new_heading %}
<h{{ _hAttrToStrip }} {{ include.bodyPrefix }} {% if beforeHeading %} {{ anchor }}{{ header }} {% else
%} {{ header }}{{ anchor }} {% endif %} {{ include.bodySuffix }} </h{{ headerLevel }}>
{% endcapture %}
<!--
If we have content after the `</hX>` tag, then we'll want to append that here so we don't lost any content.
-->
{% assign chunkCount = _workspace | size %}
{% if chunkCount > 1 %}
{% capture new_heading %}{{ new_heading }}{{ _workspace | last }}{% endcapture %}
{% endif %}
{% capture edited_headings %}{{ edited_headings }}{{ new_heading }}{% endcapture %}
{% endfor %}
{% endcapture %}{% assign headingsWorkspace = '' %}{{ edited_headings | strip }}

View file

@ -1,417 +0,0 @@
<!DOCTYPE html>
<html lang="{{ site.lang | default: " en-US" }}">
<head>
<meta charset='utf-8'>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
<link rel="stylesheet" type="text/css" href="{{ '/assets/css/print.css' | relative_url }}" media="print">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-TMTZVLLMBP"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-TMTZVLLMBP');
</script>
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
<style>
#centeringDiv {
margin: auto;
max-width: 1200px;
}
#navDiv
{
display: block;
box-sizing: border-box;
padding-top: 5px;
padding-bottom: 5px;
border-bottom-width: 3px;
border-bottom-style: solid;
border-bottom-color: #F0F0F0;
}
#navDiv nav
{
float:left;
}
#navDiv::after {
content: "";
clear: both;
display: table;
}
#navDiv nav li::after
{
content: "/";
padding-left: 10px;
padding-right: 0px;
color: #808080;
}
#navDiv nav li
{
display:inline;
padding-left: 10px;
padding-right: 0px;
}
#tocColumn {
width: 350px;
position: fixed;
overflow-y: auto;
box-sizing: border-box;
display: block;
}
#tocInner {
padding: 20px;
}
#rightColumn {
padding-left: 390px;
padding-right: 40px;
padding-top: 20px;
}
.toc_root_list {
list-style-type: none;
list-style-position: outside;
background-color: initial;
padding-left: 0px;
}
.toc_list {
padding-left: 16px;
background-color: initial;
list-style-type: none;
margin-bottom: 0px;
}
.toc_item {
cursor: pointer;
user-select: none;
list-style-type: none;
padding-left: 0px;
padding-top: 5px;
}
.toc_item_expanded::before {
content: "\25be";
cursor: pointer;
}
.toc_item_collapsed::before {
content: "\25b8";
cursor: pointer;
}
.toc_item_leaf {
padding-left: 14px;
cursor: pointer;
list-style-type: none;
}
.toc_span:hover
{
color: #d5000d;
}
.tocIcon
{
vertical-align: -2.5px;
}
.editButton
{
float: right;
margin-right: 10px;
color:#808080;
}
.editIcon
{
fill: currentColor;
vertical-align: text-top;
}
#btnToggleTOC {
display: none;
width: fit-content;
margin-left: 10px;
margin-top: 10px;
padding: 10px;
border-style: solid;
border-color: #808080;
border-width: 1px;
background-color: #E8E8E8;
}
#btnToggleTOC:hover {
background-color: #F0F0E8;
}
#btnToggleTOC:active {
background-color: #D4D4D4;
}
@media screen and (max-width: 900px) {
#tocColumn {
width: 300px;
display: block;
box-sizing: border-box;
}
#rightColumn {
padding-left: 320px;
padding-right: 20px;
}
}
@media screen and (max-width: 700px) {
#tocColumn {
width: 100%;
position: static;
display: none;
border-right-style: none;
box-sizing: content-box;
}
#tocInner {
padding: 10px;
}
#rightColumn {
padding-left: 10px;
padding-right: 10px;
}
#centeringDiv {
padding-left: 0px;
}
#btnToggleTOC {
display: block;
}
}
</style>
{% seo %}
</head>
<body>
<div id="centeringDiv">
<div id="navDiv">
{% include_relative nav.html %}
<a class="editButton" title="Edit this page" href="https://github.com/{{ site.github.repository_nwo }}/edit/master/docs/{{ page.path }}">
<svg class="editIcon" height="16" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
<path fill-rule="evenodd"
d="M11.013 1.427a1.75 1.75 0 012.474 0l1.086 1.086a1.75 1.75 0 010 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 01-.927-.928l.929-3.25a1.75 1.75 0 01.445-.758l8.61-8.61zm1.414 1.06a.25.25 0 00-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 000-.354l-1.086-1.086zM11.189 6.25L9.75 4.81l-6.286 6.287a.25.25 0 00-.064.108l-.558 1.953 1.953-.558a.249.249 0 00.108-.064l6.286-6.286z">
</path>
</svg>
</a>
</div>
<button id="btnToggleTOC" onclick="toggleTOC()">
<svg height="16" class="tocIcon" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
<path fill-rule="evenodd"
d="M2 4a1 1 0 100-2 1 1 0 000 2zm3.75-1.5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zM3 8a1 1 0 11-2 0 1 1 0 012 0zm-1 6a1 1 0 100-2 1 1 0 000 2z">
</path>
</svg>
Table of Contents</button>
<div id="tocColumn">
<div id="tocInner">
{% include_relative toc.html %}
</div>
</div>
<div id="rightColumn">
<section id="main_content">
{% include anchor_headings.html html=content anchorBody="" %}
</section>
<a href="javascript:;" id="_content_end_"></a>
<footer>
{% if site.github.is_project_page %}
{{ site.title | default: site.github.repository_name }} is maintained by <a
href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a><br>
{% endif %}
This page was generated by <a href="https://pages.github.com">GitHub Pages</a>.
</footer>
</div>
</div>
<script>
// Fix for IE. Make sure String has `startsWith` method.
if (!String.prototype.startsWith)
{
String.prototype.startsWith = function (searchString, position) {
position = position || 0;
return this.indexOf(searchString, position) === position;
};
}
var tocColumn = document.getElementById("tocColumn");
var rightColumn = document.getElementById("rightColumn");
function updateScroll()
{
if (window.innerWidth < 700)
{
tocColumn.style.height = "";
return;
}
var top = Math.max(0, rightColumn.getBoundingClientRect().top);
tocColumn.style.top = top + "px";
tocColumn.style.height = (window.innerHeight-top) + "px";
}
function updatePosition()
{
if (window.innerWidth > 700)
tocColumn.style.display = "";
tocColumn.style.left = rightColumn.getBoundingClientRect().left + "px";
updateScroll();
}
window.addEventListener("resize", updatePosition);
updatePosition();
var tocItemsArray = [];
var subSectionItems = [];
var selectedItem = null;
function toggleTOC() {
var tocColumn = document.getElementById("tocColumn");
if (tocColumn.style.display == "block")
tocColumn.style.display = "none";
else
tocColumn.style.display = "block";
event.stopPropagation();
}
function expandItem(e) {
if (e == selectedItem)
e.style["font-weight"] = "bold";
var childList = e.getElementsByClassName("toc_list");
if (childList.length == 0)
return;
childList[0].style.display = "block";
childList[0].style["font-weight"] = "normal";
e.setAttribute("class", "toc_item toc_item_expanded");
}
function collapseItem(e) {
var childList = e.getElementsByClassName("toc_list");
if (childList.length == 0)
return;
childList[0].style.display = "none";
e.setAttribute("class", "toc_item toc_item_collapsed");
}
function tocSpanOnClick(e)
{
if (event.srcElement != null && event.srcElement.parentElement != null)
{
var link = event.srcElement.parentElement.getAttribute("data-link");
if (link != null)
{
var poundIndex = link.indexOf("#");
if (poundIndex == -1)
window.location.href = link + ".html";
else
window.location.href = link.substr(0, poundIndex) + ".html#" + link.substr(poundIndex+1, link.length - poundIndex - 1);
}
}
event.stopPropagation();
}
function tocItemOnClick(e)
{
if (event.srcElement == null) return;
// Toggle expanded/collapsed state.
if (event.srcElement.getAttribute("class").endsWith("toc_item_collapsed"))
expandItem(event.srcElement);
else if (event.srcElement.getAttribute("class").endsWith("toc_item_expanded"))
collapseItem(event.srcElement);
event.stopPropagation();
}
var path = window.location.pathname;
var pageName = path.split("/").pop();
var currentPageID = pageName.substr(0, pageName.lastIndexOf("."));
if (currentPageID.length == 0)
currentPageID = "index";
var tocLists = document.getElementsByClassName("toc_root_list");
for (var i = 0; i < tocLists.length; i++) {
var tocList = tocLists[i];
var items = tocList.getElementsByTagName("li")
for (var j = 0; j < items.length; j++)
tocItemsArray.push(items[j]);
}
for (var i = 0; i < tocItemsArray.length; i++) {
var item = tocItemsArray[i];
if (item.getAttribute("data-link") == currentPageID)
selectedItem = item;
if (item.getElementsByTagName("li").length != 0) {
collapseItem(item);
}
else {
item.setAttribute("class", "toc_item toc_item_leaf");
}
item.addEventListener("click", tocItemOnClick);
var innerSpan = item.getElementsByTagName("span");
if (innerSpan.length != 0)
{
innerSpan[0].addEventListener("click", tocSpanOnClick);
innerSpan[0].setAttribute("class", "toc_span");
}
}
var curItem = selectedItem;
while (curItem != null) {
expandItem(curItem);
curItem = curItem.parentElement;
if (curItem != null && curItem.getAttribute("class") != null &&
curItem.getAttribute("class").startsWith("toc_list"))
curItem = curItem.parentElement;
if (curItem != null && curItem.getAttribute("class") != null &&
curItem.getAttribute("class").startsWith("toc_root_list"))
break;
}
var subItems = selectedItem.getElementsByTagName("li");
var subSectionTitles = [];
var subSectionTitleStrs = [];
for (var i = 0; i < subItems.length; i++)
{
subSectionItems.push(subItems[i]);
var title = subItems[i].getAttribute("data-link");
var pos = title.lastIndexOf("#");
title = title.substr(pos + 1);
var element = document.getElementById(title);
subSectionTitles.push(element);
subSectionTitleStrs.push(title);
}
subSectionTitles.push(document.getElementById("_content_end_"));
function isSectionFullyVisible(id)
{
var titleElement = subSectionTitles[id];
var nextTitleElement = subSectionTitles[id+1];
return (titleElement.getBoundingClientRect().top >= 0 && nextTitleElement.getBoundingClientRect().top <= window.innerHeight);
}
function findCurrentSubsection()
{
var currentSubsectionID = -1;
for (var i = 0; i < subSectionItems.length; i++) {
var titleElement = subSectionTitles[i];
if (titleElement == null)
continue;
if (titleElement.getBoundingClientRect().top < window.innerHeight * 0.12)
currentSubsectionID = i;
}
return currentSubsectionID;
}
function updateCurrentSubsection(currentSubsectionID)
{
for (var i = 0; i < subSectionItems.length; i++)
{
if (i == currentSubsectionID || isSectionFullyVisible(i))
subSectionItems[i].getElementsByTagName("span")[0].style["font-weight"] = 600;
else
subSectionItems[i].getElementsByTagName("span")[0].style["font-weight"] = 400;
}
}
function windowScroll(e)
{
updateCurrentSubsection(findCurrentSubsection());
updateScroll();
}
window.addEventListener("scroll", windowScroll);
updateCurrentSubsection(findCurrentSubsection());
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ['$$','$$'], ["\\(","\\)"] ],
displayMath: [ ['$$','$$'], ["\\(","\\)"] ],
},
TeX: {
Macros: {
bra: ["\\langle{#1}|", 1],
ket: ["|{#1}\\rangle", 1],
braket: ["\\langle{#1}\\rangle", 1],
bk: ["\\langle{#1}|{#2}|{#3}\\rangle", 3]
}
}
});
</script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
</body>
</html>

View file

@ -1,203 +0,0 @@
---
---
@import "{{ site.theme }}";
a:hover {
text-decoration: underline;
}
h3 {
color: #363636;
}
h4 {
color: #363636;
}
blockquote {
background-color: #f2f2f2;
padding-top: 10px;
padding-bottom: 5px;
}
blockquote p {
font-size: 16px;
font-weight: 400;
margin-bottom: 5px;
color: #202020;
}
body {
color: initial;
text-shadow: none;
background: none;
}
#container
{
background:none;
}
.highlight .cm {
color: #148b04;
}
.highlight .cp {
color: #148b04;
}
.highlight .c1 {
color: #148b04;
}
.highlight .cs {
color: #148b04;
}
.highlight .c, .highlight .ch, .highlight .cd, .highlight .cpf {
color: #148b04;
}
.highlight .err {
color: #a61717;
background-color: #e3d2d2;
}
.highlight .gd {
color: #000000;
background-color: #ffdddd;
}
.highlight .ge {
color: #000000;
font-style: italic;
}
.highlight .gr {
color: #aa0000;
}
.highlight .gh {
color: #999999;
}
.highlight .gi {
color: #000000;
background-color: #ddffdd;
}
.highlight .go {
color: #888888;
}
.highlight .gp {
color: #555555;
}
.highlight .gu {
color: #aaaaaa;
}
.highlight .gt {
color: #aa0000;
}
.highlight .kc {
color: #1243d4;
}
.highlight .kd {
color: #1243d4;
}
.highlight .kn {
color: #1243d4;
}
.highlight .kp {
color: #1243d4;
}
.highlight .kr {
color: #1243d4;
}
.highlight .kt {
color: #1243d4;
}
.highlight .k, .highlight .kv {
color: #1243d4;
}
.highlight .m, .highlight .mb, .highlight .mx, .highlight .mi, .highlight .mf {
color: #7211c2;
}
.highlight .sa {
color: #000000;
}
.highlight .sb {
color: #d14;
}
.highlight .sc {
color: #d14;
}
.highlight .sd {
color: #d14;
}
.highlight .s2 {
color: #d14;
}
.highlight .se {
color: #d14;
}
.highlight .sh {
color: #d14;
}
.highlight .si {
color: #d14;
}
.highlight .sx {
color: #d14;
}
.highlight .sr {
color: #009926;
}
.highlight .s1 {
color: #d14;
}
.highlight .ss {
color: #990073;
}
.highlight .s, .highlight .dl {
color: #d14;
}
.highlight .na {
color: #008080;
}
.highlight .bp {
color: #999999;
}
.highlight .n{
color: black;
}
.highlight .nc {
color: #11abb9;
}
.highlight .nt {
color: #11abb9;
}
.highlight .vc {
color: #008080;
}
.highlight .vg {
color: #008080;
}
.highlight .vi {
color: #008080;
}
.highlight .nv, .highlight .vm {
color: #008080;
}
.highlight .ow {
color: #000000;
}
.highlight .o {
color: #000000;
}
.highlight .w {
color: #000000;
}
.highlight .p {color:#000000;}
code
{
background-color: initial;
border:none;
}
pre{
color: #000000;
background: #F8F8F8;
}
pre code {
color: #000000;
background-color: #F8F8F8;
}
.highlight
{
background: #F8F8F8;
}

View file

@ -1,62 +0,0 @@
# This script uses `slangc` to generate the core module reference documentation and push the updated
# documents to shader-slang/stdlib-reference repository.
# The stdlib-reference repository has github-pages setup so that the markdown files we generate
# in this step will be rendered as html pages by Jekyll upon a commit to the repository.
# So we we need to do here is to pull the stdlib-reference repository, regenerate the markdown files
# and push the changes back to the repository.
# The generated markdown files will be located in three folders:
# - ./global-decls
# - ./interfaces
# - ./types
# In addition, slangc will generate a table of content file `toc.html` which will be copied to
# ./_includes/stdlib-reference-toc.html for Jekyll for consume it correctly.
# If stdlib-reference folder does not exist, clone from github repo
if (-not (Test-Path ".\stdlib-reference")) {
git clone https://github.com/shader-slang/stdlib-reference/
}
else {
# If it already exist, just pull the latest changes.
cd stdlib-reference
git pull
cd ../
}
# Remove the old generated files.
Remove-Item -Path ".\stdlib-reference\global-decls" -Recurse -Force
Remove-Item -Path ".\stdlib-reference\interfaces" -Recurse -Force
Remove-Item -Path ".\stdlib-reference\types" -Recurse -Force
Remove-Item -Path ".\stdlib-reference\attributes" -Recurse -Force
# Use git describe to produce a version string and write it to _includes/version.inc.
# This file will be included by the stdlib-reference Jekyll template.
git describe --tags | Out-File -FilePath ".\stdlib-reference\_includes\version.inc" -Encoding ASCII
cd stdlib-reference
$slangPaths = @(
"../../build/RelWithDebInfo/bin/slangc.exe",
"../../build/Release/bin/slangc.exe",
"../../build/Debug/bin/slangc.exe"
)
$slangExe = $slangPaths | Where-Object { Test-Path $_ } | Select-Object -First 1
if ($slangExe) {
& $slangExe -compile-core-module -doc
Move-Item -Path ".\toc.html" -Destination ".\_includes\stdlib-reference-toc.html" -Force
git config user.email "bot@shader-slang.com"
git config user.name "Stdlib Reference Bot"
git add .
git commit -m "Update the core module reference"
git push
} else {
Write-Error "Could not find slangc executable in RelWithDebInfo or Release directories"
}
cd ../
# For local debugging only.
# Remove-Item -Path "D:\git_repo\stdlib-reference\global-decls" -Recurse -Force
# Remove-Item -Path "D:\git_repo\stdlib-reference\interfaces" -Recurse -Force
# Remove-Item -Path "D:\git_repo\stdlib-reference\types" -Recurse -Force
# Copy-Item -Path .\stdlib-reference\global-decls -Destination D:\git_repo\stdlib-reference\global-decls -Recurse -Force
# Copy-Item -Path .\stdlib-reference\interfaces -Destination D:\git_repo\stdlib-reference\interfaces -Recurse -Force
# Copy-Item -Path .\stdlib-reference\types -Destination D:\git_repo\stdlib-reference\types -Recurse -Force
# Copy-Item -Path .\stdlib-reference\_includes\stdlib-reference-toc.html -Destination D:\git_repo\stdlib-reference\_includes\stdlib-reference-toc.html -Force

View file

@ -1,12 +0,0 @@
$job = Start-Job -ArgumentList $PSScriptRoot -ScriptBlock {
Set-Location $args[0]
$code = (Get-Content -Raw -Path "scripts/Program.cs").ToString()
$assemblies = ("System.Core", "System.IO", "System.Collections")
Add-Type -ReferencedAssemblies $assemblies -TypeDefinition $code -Language CSharp
$path = Join-Path -Path $args[0] -ChildPath "user-guide"
[toc.Builder]::Run($path);
$path = Join-Path -Path $args[0] -ChildPath "gfx-user-guide"
[toc.Builder]::Run($path);
}
Wait-Job $job
Receive-Job -Job $job

View file

@ -1,127 +0,0 @@
#!/usr/bin/env bash
set -e
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
project_root="$(dirname "$script_dir")"
check_only=0
show_help() {
me=$(basename "$0")
cat <<EOF
$me: Build table of contents for documentation directories
Usage: $me [--help] [--source <path>] [--check-only]
Options:
--help Show this help message
--source Path to project root directory (defaults to parent of the script directory)
--check-only Check if TOC needs updating, exit 1 if changes needed
EOF
}
while [[ "$#" -gt 0 ]]; do
case $1 in
-h | --help)
show_help
exit 0
;;
--source)
project_root="$2"
shift
;;
--check-only)
check_only=1
;;
*)
echo "unrecognized argument: $1" >&2
show_help >&2
exit 1
;;
esac
shift
done
missing_bin=0
require_bin() {
local name="$1"
if ! command -v "$name" &>/dev/null; then
echo "This script needs $name, but it isn't in \$PATH" >&2
missing_bin=1
return
fi
}
require_bin "mcs"
require_bin "mono"
if [ "$missing_bin" -eq 1 ]; then
exit 1
fi
temp_dir=$(mktemp -d)
trap 'rm -rf "$temp_dir"' EXIT
docs_dir="$project_root/docs"
cat >"$temp_dir/temp_program.cs" <<EOL
$(cat "$script_dir/scripts/Program.cs")
namespace toc
{
class Program
{
static int Main(string[] args)
{
if (args.Length < 1)
{
Console.WriteLine("Please provide a directory path");
return 1;
}
try
{
Builder.Run(args[0]);
return 0;
}
catch (Exception ex)
{
Console.WriteLine(\$"Error: {ex.Message}");
return 1;
}
}
}
}
EOL
if ! mcs -r:System.Core "$temp_dir/temp_program.cs" -out:"$temp_dir/toc-builder.exe"; then
echo "Compilation of $script_dir/scripts/Program.cs failed" >&2
exit 1
fi
for dir in "user-guide" "gfx-user-guide"; do
if [ -d "$docs_dir/$dir" ]; then
if [ "$check_only" -eq 1 ]; then
# Ensure working directory is clean
if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then
echo "Working directory not clean, cannot check TOC" >&2
exit 1
fi
fi
if ! mono "$temp_dir/toc-builder.exe" "$docs_dir/$dir"; then
echo "TOC generation failed for $dir" >&2
exit 1
fi
if [ "$check_only" -eq 1 ]; then
if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then
git -C "$project_root" diff --color "docs/$dir/toc.html"
git -C "$project_root" checkout -- "docs/$dir/toc.html" 2>/dev/null
exit 1
fi
fi
else
echo "Directory $dir not found" >&2
fi
done

View file

@ -1,328 +0,0 @@
# Building Slang From Source
### TLDR
`cmake --workflow --preset release` to configure, build, and package a release
version of Slang.
## Prerequisites:
Please install:
- CMake (3.25 preferred, but 3.22 works[^1])
- A C++ compiler with support for C++17. GCC, Clang and MSVC are supported
- A CMake compatible backend, for example Visual Studio or Ninja
- Python3 (a dependency for building spirv-tools)
Optional dependencies for tests include
- CUDA
- OptiX
- NVAPI
- Aftermath
- X11
Other dependencies are sourced from submodules in the [./external](./external)
directory.
## Get the Source Code
Clone [this](https://github.com/shader-slang/slang) repository. Make sure to
fetch the submodules also.
```bash
git clone https://github.com/shader-slang/slang --recursive
```
## Configure and build
> This section assumes cmake 3.25 or greater, if you're on a lower version
> please see [building with an older cmake](#building-with-an-older-cmake)
For a Ninja based build system (all platforms) run:
```bash
cmake --preset default
cmake --build --preset releaseWithDebugInfo # or --preset debug, or --preset release
```
For Visual Studio run:
```bash
cmake --preset vs2022 # or 'vs2019' or `vs2022-dev`
start devenv ./build/slang.sln # to optionally open the project in Visual Studio
cmake --build --preset releaseWithDebugInfo # to build from the CLI, could also use --preset release or --preset debug
```
There also exists a `vs2022-dev` preset which turns on features to aid
debugging.
### WebAssembly build
In order to build WebAssembly build of Slang, Slang needs to be compiled with
[Emscripten SDK](https://github.com/emscripten-core/emsdk). You can find more
information about [Emscripten](https://emscripten.org/).
You need to clone the EMSDK repo. And you need to install and activate the latest.
```bash
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
```
For non-Windows platforms
```bash
./emsdk install latest
./emsdk activate latest
```
For Windows
```cmd
emsdk.bat install latest
emsdk.bat activate latest
```
After EMSDK is activated, Slang needs to be built in a cross compiling setup:
- build the `generators` target for the build platform
- configure the build with `emcmake` for the host platform
- build for the host platform.
> Note: For more details on cross compiling please refer to the
> [cross-compiling](docs/building.md#cross-compiling) section.
```bash
# Build generators.
cmake --workflow --preset generators --fresh
mkdir generators
cmake --install build --prefix generators --component generators
# Configure the build with emcmake.
# emcmake is available only when emsdk_env setup the environment correctly.
pushd ../emsdk
source ./emsdk_env # For Windows, emsdk_env.bat
popd
emcmake cmake -DSLANG_GENERATORS_PATH=generators/bin --preset emscripten -G "Ninja"
# Build slang-wasm.js and slang-wasm.wasm in build.em/Release/bin
cmake --build --preset emscripten --target slang-wasm
```
> Note: If the last build step fails, try running the command that `emcmake`
> outputs, directly.
## Installing
Build targets may be installed using cmake:
```bash
cmake --build . --target install
```
This should install `SlangConfig.cmake` that should allow `find_package` to work.
SlangConfig.cmake defines `SLANG_EXECUTABLE` variable that will point to `slangc`
executable and also define `slang::slang` target to be linked to.
For now, `slang::slang` is the only exported target defined in the config which can
be linked to.
Example usage
```cmake
find_package(slang REQUIRED PATHS ${your_cmake_install_prefix_path} NO_DEFAULT_PATH)
# slang_FOUND should be automatically set
target_link_libraries(yourLib PUBLIC
slang::slang
)
```
## Testing
```bash
build/Debug/bin/slang-test
```
See the [documentation on testing](../tools/slang-test/README.md) for more information.
## More niche topics
### CMake options
| Option | Default | Description |
|-----------------------------------|----------------------------|----------------------------------------------------------------------------------------------|
| `SLANG_VERSION` | Latest `v*` tag | The project version, detected using git if available |
| `SLANG_EMBED_CORE_MODULE` | `TRUE` | Build slang with an embedded version of the core module |
| `SLANG_EMBED_CORE_MODULE_SOURCE` | `TRUE` | Embed the core module source in the binary |
| `SLANG_ENABLE_DXIL` | `TRUE` | Enable generating DXIL using DXC |
| `SLANG_ENABLE_ASAN` | `FALSE` | Enable ASAN (address sanitizer) |
| `SLANG_ENABLE_FULL_IR_VALIDATION` | `FALSE` | Enable full IR validation (SLOW!) |
| `SLANG_ENABLE_IR_BREAK_ALLOC` | `FALSE` | Enable IR BreakAlloc functionality for debugging. |
| `SLANG_ENABLE_GFX` | `TRUE` | Enable gfx targets |
| `SLANG_ENABLE_SLANGD` | `TRUE` | Enable language server target |
| `SLANG_ENABLE_SLANGC` | `TRUE` | Enable standalone compiler target |
| `SLANG_ENABLE_SLANGRT` | `TRUE` | Enable runtime target |
| `SLANG_ENABLE_SLANG_GLSLANG` | `TRUE` | Enable glslang dependency and slang-glslang wrapper target |
| `SLANG_ENABLE_TESTS` | `TRUE` | Enable test targets, requires SLANG_ENABLE_GFX, SLANG_ENABLE_SLANGD and SLANG_ENABLE_SLANGRT |
| `SLANG_ENABLE_EXAMPLES` | `TRUE` | Enable example targets, requires SLANG_ENABLE_GFX |
| `SLANG_LIB_TYPE` | `SHARED` | How to build the slang library |
| `SLANG_ENABLE_RELEASE_DEBUG_INFO` | `TRUE` | Enable generating debug info for Release configs |
| `SLANG_ENABLE_SPLIT_DEBUG_INFO` | `TRUE` | Enable generating split debug info for Debug and RelWithDebInfo configs |
| `SLANG_SLANG_LLVM_FLAVOR` | `FETCH_BINARY_IF_POSSIBLE` | How to set up llvm support |
| `SLANG_SLANG_LLVM_BINARY_URL` | System dependent | URL specifying the location of the slang-llvm prebuilt library |
| `SLANG_GENERATORS_PATH` | `` | Path to an installed `all-generators` target for cross compilation |
The following options relate to optional dependencies for additional backends
and running additional tests. Left unchanged they are auto detected, however
they can be set to `OFF` to prevent their usage, or set to `ON` to make it an
error if they can't be found.
| Option | CMake hints | Notes |
|--------------------------|--------------------------------|----------------------------------------------------------------------------------------------|
| `SLANG_ENABLE_CUDA` | `CUDAToolkit_ROOT` `CUDA_PATH` | Enable running tests with the CUDA backend, doesn't affect the targets Slang itself supports |
| `SLANG_ENABLE_OPTIX` | `Optix_ROOT_DIR` | Requires CUDA |
| `SLANG_ENABLE_NVAPI` | `NVAPI_ROOT_DIR` | Only available for builds targeting Windows |
| `SLANG_ENABLE_AFTERMATH` | `Aftermath_ROOT_DIR` | Enable Aftermath in GFX, and add aftermath crash example to project |
| `SLANG_ENABLE_XLIB` | | |
### Advanced options
| Option | Default | Description |
|------------------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------|
| `SLANG_ENABLE_DX_ON_VK` | `FALSE` | Enable running the DX11 and DX12 tests on non-warning Windows platforms via vkd3d-proton, requires system-provided d3d headers |
| `SLANG_ENABLE_SLANG_RHI` | `TRUE` | Enable building and using [slang-rhi](https://github.com/shader-slang/slang-rhi) for tests |
| `SLANG_USE_SYSTEM_MINIZ` | `FALSE` | Build using system Miniz library instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_LZ4` | `FALSE` | Build using system LZ4 library instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_VULKAN_HEADERS` | `FALSE` | Build using system Vulkan headers instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_SPIRV_HEADERS` | `FALSE` | Build using system SPIR-V headers instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_UNORDERED_DENSE` | `FALSE` | Build using system unordered dense instead of the bundled version in [./external](./external) |
| `SLANG_SPIRV_HEADERS_INCLUDE_DIR` | `` | Use this specific path to SPIR-V headers instead of the bundled version in [./external](./external) |
### LLVM Support
There are several options for getting llvm-support:
- Use a prebuilt binary slang-llvm library:
`-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY` or `-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY_IF_POSSIBLE` (this is the default)
- You can set `SLANG_SLANG_LLVM_BINARY_URL` to point to a local
`libslang-llvm.so/slang-llvm.dll` or set it to a URL of an zip/archive
containing such a file
- If this isn't set then the build system tries to download it from the
release on github matching the current tag. If such a tag doesn't exist
or doesn't have the correct os*arch combination then the latest release
will be tried.
- If `SLANG_SLANG_LLVM_BINARY_URL` is `FETCH_BINARY_IF_POSSIBLE` then in
the case that a prebuilt binary can't be found then the build will proceed
as though `DISABLE` was chosen
- Use a system supplied LLVM: `-DSLANG_SLANG_LLVM_FLAVOR=USE_SYSTEM_LLVM`, you
must have llvm-13.0 and a matching libclang installed. It's important that
either:
- You don't end up linking to a dynamic libllvm.so, this will almost
certainly cause multiple versions of LLVM to be loaded at runtime,
leading to errors like `opt: CommandLine Error: Option
'asm-macro-max-nesting-depth' registered more than once!`. Avoid this by
compiling LLVM without the dynamic library.
- Anything else which may be linked in (for example Mesa, also dynamically
loads the same llvm object)
- Do not enable LLVM support: `-DSLANG_SLANG_LLVM_FLAVOR=DISABLE`
To build only a standalone slang-llvm, you can run:
```bash
cmake --workflow --preset slang-llvm
```
This will generate `build/dist-release/slang-slang-llvm.zip` containing the
library. This, of course, uses the system LLVM to build slang-llvm, otherwise
it would just be a convoluted way to download a prebuilt binary.
### Cross compiling
Slang generates some code at build time, using generators build from this
codebase. Due to this, for cross compilation one must already have built these
generators for the build platform. Build them with the `generators` preset, and
pass the install path to the cross building CMake invocation using
`SLANG_GENERATORS_PATH`
Non-Windows platforms:
```bash
# build the generators
cmake --workflow --preset generators --fresh
mkdir build-platform-generators
cmake --install build --config Release --prefix build-platform-generators --component generators
# reconfigure, pointing to these generators
# Here is also where you should set up any cross compiling environment
cmake \
--preset default \
--fresh \
-DSLANG_GENERATORS_PATH=build-platform-generators/bin \
-Dwhatever-other-necessary-options-for-your-cross-build \
# for example \
-DCMAKE_C_COMPILER=my-arch-gcc \
-DCMAKE_CXX_COMPILER=my-arch-g++
# perform the final build
cmake --workflow --preset release
```
Windows
```bash
# build the generators
cmake --workflow --preset generators --fresh
mkdir build-platform-generators
cmake --install build --config Release --prefix build-platform-generators --component generators
# reconfigure, pointing to these generators
# Here is also where you should set up any cross compiling environment
# For example
./vcvarsamd64_arm64.bat
cmake \
--preset default \
--fresh \
-DSLANG_GENERATORS_PATH=build-platform-generators/bin \
-Dwhatever-other-necessary-options-for-your-cross-build
# perform the final build
cmake --workflow --preset release
```
### Example cross compiling with MSVC to windows-aarch64
One option is to build using the ninja generator, which requires providing the
native and cross environments via `vcvarsall.bat`
```bash
vcvarsall.bat
cmake --workflow --preset generators --fresh
mkdir generators
cmake --install build --prefix generators --component generators
vsvarsall.bat x64_arm64
cmake --preset default --fresh -DSLANG_GENERATORS_PATH=generators/bin
cmake --workflow --preset release
```
Another option is to build using the Visual Studio generator which can find
this automatically
```
cmake --preset vs2022 # or --preset vs2019
cmake --build --preset generators # to build from the CLI
cmake --install build --prefix generators --component generators
rm -rf build # The Visual Studio generator will complain if this is left over from a previous build
cmake --preset vs2022 --fresh -A arm64 -DSLANG_GENERATORS_PATH=generators/bin
cmake --build --preset release
```
## Building with an older CMake
Because older CMake versions don't support all the features we want to use in
CMakePresets, you'll have to do without the presets. Something like the following
```bash
cmake -B build -G Ninja
cmake --build build -j
```
## Notes
[^1] below 3.25, CMake lacks the ability to mark directories as being
system directories (https://cmake.org/cmake/help/latest/prop_tgt/SYSTEM.html#prop_tgt:SYSTEM),
this leads to an inability to suppress warnings originating in the
dependencies in `./external`, so be prepared for some additional warnings.

View file

@ -1,36 +0,0 @@
# Our CI
There are github actions for building and testing slang.
## Tests
Most configurations run a restricted set of tests, however on some self hosted
runners we run the full test suite, as well as running Falcor's test suite with
the new slang build.
## Building LLVM
We require a static build of LLVM for building slang-llvm, we build and cache
this in all workflow runs. Since this changes infrequently, the cache is almost
always hit. A cold build takes about an hour on the slowest platform. The
cached output is a few hundred MB, so conceivably if we add many more platforms
we might be caching more than the 10GB github allowance, which would
necessitate being a bit more complicated in building and tracking outputs here.
For slang-llvm, this is handled the same as any other dependency, except on
Windows Debug builds, where we are required by the differences in Debug/Release
standard libraries to always make a release build, this is noted in the ci
action yaml file.
Note that we don't use sccache while building LLVM, as it changes very
infrequently. The caching of LLVM is done by caching the final build product
only.
## sccache
> Due to reliability issues, we are not currently using sccache, this is
> historical/aspirational.
The CI actions use sccache, keyed on compiler and platform, this runs on all
configurations and significantly speeds up small source change builds. This
cache can be safely missed without a large impact on build times.

View file

@ -1,648 +0,0 @@
Slang CPU Target Support
========================
Slang has preliminary support for producing CPU source and binaries.
# Features
* Can compile C/C++/Slang source to binaries (executables, shared libraries or [directly executable](#host-callable))
* Does *not* require a C/C++ be installed if [slang-llvm](#slang-llvm) is available (as distributed with slang binary distributions)
* Can compile Slang source into C++ source code
* Supports compute style shaders
# Limitations
These limitations apply to Slang transpiling to C++.
* Barriers are not supported (making these work would require an ABI change)
* Atomics are not currently supported
* Limited support for [out of bounds](#out-of-bounds) accesses handling
* Entry point/s cannot be named `main` (this is because downstream C++ compiler/s expecting a regular `main`)
* `float16_t` type is not currently supported
For current C++ source output, the compiler needs to support partial specialization.
# How it works
The initial version works by using a 'downstream' C/C++ compiler. A C++ compiler does *not* in general need to be installed on a system to compile and execute code as long as [slang-llvm](#slang-llvm) is available. A [regular C/C++](#regular-cpp) compiler can also be used, allowing access to tooling, such as profiling and debuggers, as well as being able to use regular host development features such as linking, libraries, shared libraries/dlls and executables.
The C/C++ backend can be directly accessed much like 'dxc', 'fxc' of 'glslang' can, using the pass-through mechanism with the following new backends...
```
SLANG_PASS_THROUGH_CLANG, ///< Clang C/C++ compiler
SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler
SLANG_PASS_THROUGH_GCC, ///< GCC C/C++ compiler
SLANG_PASS_THROUGH_LLVM, ///< slang-llvm 'compiler' - includes LLVM and Clang
SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type
```
Sometimes it is not important which C/C++ compiler is used, and this can be specified via the 'Generic C/C++' option. This will aim to use the compiler that is most likely binary compatible with the compiler that was used to build the Slang binary being used.
To make it possible for Slang to produce CPU code, in this first iteration we convert Slang code into C/C++ which can subsequently be compiled into CPU code. If source is desired instead of a binary this can be specified via the SlangCompileTarget. These can be specified on the `slangc` command line as `-target cpp`.
When using the 'pass through' mode for a CPU based target it is currently necessary to set an entry point, even though it's basically ignored.
In the API the `SlangCompileTarget`s are
```
SLANG_C_SOURCE ///< The C language
SLANG_CPP_SOURCE ///< The C++ language
SLANG_HOST_CPP_SOURCE, ///< C++ code for `host` style
```
Using the `-target` command line option
* `C_SOURCE`: c
* `CPP_SOURCE`: cpp,c++,cxx
* `HOST_CPP_SOURCE`: host-cpp,host-c++,host-cxx
Note! Output of C source is not currently supported.
If a CPU binary is required this can be specified as a `SlangCompileTarget` of
```
SLANG_EXECUTABLE ///< Executable (for hosting CPU/OS)
SLANG_SHADER_SHARED_LIBRARY ///< A shared library/Dll (for hosting CPU/OS)
SLANG_SHADER_HOST_CALLABLE ///< A CPU target that makes `compute kernel` compiled code available to be run immediately
SLANG_HOST_HOST_CALLABLE ///< A CPU target that makes `scalar` compiled code available to be run immediately
SLANG_OBJECT_CODE, ///< Object code that can be used for later linking
```
Using the `-target` command line option
* `EXECUTABLE`: exe, executable
* `SHADER_SHARED_LIBRARY`: sharedlib, sharedlibrary, dll
* `SHADER_HOST_CALLABLE`: callable, host-callable
* `OBJECT_CODE`: object-conde
* `HOST_HOST_CALLABLE`: host-host-callable
Using `host-callable` types from the the command line, other than to test such code compile and can be loaded for host execution.
For launching a [shader like](#compile-style) Slang code on the CPU, there typically needs to be binding of values passed the entry point function. How this works is described in the [ABI section](#abi). Functions *can* be executed directly but care must be taken to [export](#visibility) them and such that there isn't an issue with [context threading](#context-threading).
If a binary target is requested, the binary contents can be returned in a ISlangBlob just like for other targets. When using a [regular C/C++ compiler](#regular-cpp) the CPU binary typically must be saved as a file and then potentially marked for execution by the OS. It may be possible to load shared libraries or dlls from memory - but doing so is a non standard feature, that requires unusual work arounds. If possible it is typically fastest and easiest to use [slang-llvm](#slang-llvm) to directly execute slang or C/C++ code.
## <a id="compile-style"/>Compilation Styles
There are currently two styles of *compilation style* supported - `host` and `shader`.
The `shader` style implies
* The code *can* be executed in a GPU-kernel like execution model, launched across multiple threads (as described in the [ABI](#abi))
* Currently no reference counting
* Only functionality from the Slang core module, built in HLSL or anything supplied by a [COM interfaces](#com-interface) is available
* Currently [slang-llvm](#slang-llvm) only supports the `shader` style
The `host` style implies
* Execution style is akin to more regular CPU scalar code
* Typically requires linking with `slang-rt` and use of `slang-rt` types such as `Slang::String`
* Allows use of `new`
* Allows the use of `class` for reference counted types
* COM interfaces are reference counted
The styles as used with [host-callable](#host-callable) are indicated via the API by
```
SLANG_SHADER_HOST_CALLABLE ///< A CPU target that makes `compute kernel` compiled code available to be run immediately
SLANG_HOST_HOST_CALLABLE ///< A CPU target that makes `scalar` compiled code available to be run immediately
```
Or via the `-target` command line options
* For 'shader' `callable` `host-callable`
* For 'host' `host-host-callable`
For an example of the `host` style please look at "examples/cpu-hello-world".
## <a id="host-callable"/>Host callable
Slang supports `host-callable` compilation targets which allow for the direct execution of the compiled code on the CPU. Currently this style of execution is supported if [slang-llvm](#slang-llvm) or a [regular C/C++ compiler](#regular-cpp) are available.
There are currently two [compilation styles](#compile-style) supported.
In order to call into `host-callable` code after compilation it's necessary to access the result via the `ISlangSharedLibrary` interface.
Please look at the [ABI](#abi) section for more specifics around ABI usage especially for `shader` [compile styles](#compile-style).
```C++
slang::ICompileRequest* request = ...;
const SlangResult compileRes = request->compile();
// Even if there were no errors that forced compilation to fail, the
// compiler may have produced "diagnostic" output such as warnings.
// We will go ahead and print that output here.
//
if(auto diagnostics = request->getDiagnosticOutput())
{
printf("%s", diagnostics);
}
// Get the 'shared library' (note that this doesn't necessarily have to be implemented as a shared library
// it's just an interface to executable code).
ComPtr<ISlangSharedLibrary> sharedLibrary;
SLANG_RETURN_ON_FAIL(request->getTargetHostCallable(0, sharedLibrary.writeRef()));
// We can now find exported functions/variables via findSymbolAddressByName
// For a __global public __extern_cpp int myGlobal;
{
auto myGlobalPtr = (int*)sharedLibrary->findSymbolAddressByName("myGlobal");
if (myGlobalPtr)
{
*myGlobalPtr = 10;
}
}
// To get a function
//
// public __extern_cpp int add(int a, int b);
// Test a free function
{
typedef int (*AddFunc)(int a, int b);
auto func = (AddFunc)sharedLibrary->findFuncByName("add");
if (func)
{
// Let's add!
int c = func(10, 20):
}
}
```
## <a id="slang-llvm"/>slang-llvm
`slang-llvm` is a special Slang version of [LLVM](https://llvm.org/). It's current main purpose is to allow compiling C/C++ such that it is [directly available](#host-callable) for execution using the LLVM JIT feature. If `slang-llvm` is available it is the default downstream compiler for [host-callable](#host-callable). This is because it allows for faster compilation, avoids the file system, and can execute the compiled code directly. [Regular C/C++ compilers](#regular-cpp) can be used for [host-callable](#host-callable) but requires writing source files to the file system and creating/loading shared-libraries/dlls to make the feature work. Additionally using `slang-llvm` avoids the need for a C/C++ compiler installed on a target system.
`slang-llvm` contains the Clang C++ compiler, so it is possible to also compile and execute C/C++ code in the [host-callable](#host-callable) style.
Limitations of using `slang-llvm`
* Can only currently be used for [shader style](#compile-style)
* Cannot produce object files, libraries, OS executables or binaries
* Is *limited* because it is not possible to directly access libraries such as the C or C++ standard libraries (see [COM interface](#com-interface) for a work-around)
* It's not possible to source debug into `slang-llvm` compiled code running on the JIT (see [debugging](#debugging) for a work-around)
* Not currently possible to return as a ISlangBlob representation
You can detect if `slang-llvm` is available via
```C++
slang::IGlobalSession* slangSession = ...;
const bool hasSlangLlvm = SLANG_SUCCEEDED(slangSession->checkPassThroughSupport(SLANG_PASS_THROUGH_LLVM));
```
## <a id="regular-cpp"/>Regular C/C++ compilers
Slang can work with regular C/C++ 'downstream' compilers. It has been tested to work with Visual Studio, Clang and G++/Gcc on Windows and Linux.
Under the covers when Slang is used to generate a binary via a C/C++ compiler, it must do so through the file system. Currently this means the source (say generated by Slang) and the binary (produced by the C/C++ compiler) must all be files. To make this work Slang uses temporary files. The reasoning for hiding this mechanism, other than simplicity, is that it allows using with [slang-llvm](#slang-llvm) without any changes.
## <a id="visibility"/>Visibility
In a typical Slang [shader like](#compile-style) scenario, functionality is exposed via entry points. It can be convenient and desirable to be able to call Slang functions directly from application code, and not just via entry points. By default non entry point functions are *removed* if they are not reachable by the specified entry point. Additionally for non entry point functions Slang typically generates function names that differ from the original name.
To work around these two issues the `public` and `__extern_cpp` modifiers can be used.
`public` makes the variable or function visible outside of the module even if it isn't used within the module. For the function to work it will also keep around any function or variable it accesses.
Note! Some care is needed here around [context threading](#context-threading) - if a function or any function a function accesses requires state held in the context, the signature of the function will be altered to include the context as the first parameter.
Making a function or variable `public` does not mean that the name remains the same. To indicate that the name should not be altered use the `__extern_cpp` modifier. For example
```
// myGlobal will be visible to the application (note the __global modifier additionally means it has C++ global behavior)
__global public __extern_cpp int myGlobal;
// myFunc is available to the application
public __extern_cpp myFunc(int a)
{
return a * a;
}
```
## <a id="com-interface"/>COM interface support
Slang has preliminary support for [Component Object Model (COM)](https://en.wikipedia.org/wiki/Component_Object_Model) interfaces in CPU code.
```
[COM]
interface IDoThings
{
int doThing(int a, int b);
int calcHash(NativeString in);
void printMessage(NativeString nativeString);
}
```
This support provides a way for an application to provide access to functionality in the application runtime - essentially it allows Slang code to call into application code. To do this a COM interface can be created that exposes the desired functionality. The interface/s can be made available through any of the normal mechanisms - such as through a constant buffer variable. Additionally [`__global`](#actual-global) provides a way to make functions available to Slang code without the need for [context threading](#context-threading).
The example "examples/cpu-com-example" shows this at work.
## <a id="actual-global"/>Global support
The Slang language is based on the HLSL language. This heritage means that globals have slightly different meaning to typical C/C++ usage.
```
int myGlobal; ///< A constant value stored in a constant buffer
static int staticMyGlobal; ///< A global that cannot be seen by the application
static const int staticConstMyGlobal; ///< A fixed value
```
The variable `myGlobal` will be a member of a constant buffer, meaning it's value can only change via bindings and not during execution. For some uses having `myGlobal` in the constant buffer might be appropriate, for example
* It's use is reached from a [shader style](#compile-style) entry point
* It's value is constant across the launch
In Slang a variable can be declared as global in the C/C++ sense via the `__global` modifier. For example
```
__global int myGlobal;
```
Doing so means
* `myGlobal` will not be defined in the constant buffer
* It can be used in functions that do not have access to the [constant buffer](#context-threading)
* It can be modified in the kernel
* Can only be used on CPU targets (currently `__global` is not supported on the GPU targets)
One disadvantage of using `__global` is in multi-threaded environments, with multiple launches on multiple CPU threads, there is only one global and will likely cause problems unless the global value is the same across all threads.
It may be useful to set a global directly via host code, without having to write a function to enable the access. This is possible by using [`public`](#visibility) and [`__extern_cpp`](#visibility) modifiers. For example
```
__global public __extern_cpp int myGlobal;
```
The global can now be set from host code via
```C++
slang::ICompileRequest = ...;
// Get the 'shared library' (note that this doesn't necessarily have to be implemented as a shared library
// it's just an interface to executable code).
ComPtr<ISlangSharedLibrary> sharedLibrary;
SLANG_RETURN_ON_FAIL(request->getTargetHostCallable(0, sharedLibrary.writeRef()));
// Set myGlobal to 20
{
auto myGlobalPtr = (int*)sharedLibrary->findSymbolAddressByName("myGlobal");
*myGlobalPtr = 20;
}
```
In terms of reflection `__global` variables are not visible.
## NativeString
Slang supports a rich 'String' type when using the [host style](#compile-style), which for C++ targets is implemented as the `Slang::String` C++ type. The type is only available on CPU targets that support `slang-rt`.
Some limited String-like support is available via `NativeString` type which for C/C++ CPU targets is equivalent to `const char*`. For GPU targets this will use the same hash mechanism as normally available.
`NativeString` is supported by all [shader compilation styles](#compile-style) including [slang-llvm](#slang-llvm).
TODO(JS): What happens with String with shader compile style on CPU? Shouldn't it be the same as GPU (and reflected as such in reflection)?
## Debugging
It is currently not possible to step into LLVM-JIT code when using [slang-llvm](#slang-llvm). Fortunately it is possible to step into code compiled via a [regular C/C++ compiler](#regular-cpp).
Below is a code snippet showing how to switch to a [regular C/C++ compiler](#regular-cpp) at runtime.
```C++
SlangPassThrough findRegularCppCompiler(slang::IGlobalSession* slangSession)
{
// Current list of 'regular' C/C++ compilers
const SlangPassThrough cppCompilers[] =
{
SLANG_PASS_THROUGH_VISUAL_STUDIO,
SLANG_PASS_THROUGH_GCC,
SLANG_PASS_THROUGH_CLANG,
};
// Do we have a C++ compiler
for (const auto compiler : cppCompilers)
{
if (SLANG_SUCCEEDED(slangSession->checkPassThroughSupport(compiler)))
{
return compile;
}
}
return SLANG_PASS_THROUGH_NONE;
}
SlangResult useRegularCppCompiler(slang::IGlobalSession* session)
{
const auto regularCppCompiler = findRegularCppCompiler(session)
if (regularCppCompiler != SLANG_PASS_THROUGH_NONE)
{
slangSession->setDownstreamCompilerForTransition(SLANG_CPP_SOURCE, SLANG_SHADER_HOST_CALLABLE, regularCppCompiler);
slangSession->setDownstreamCompilerForTransition(SLANG_CPP_SOURCE, SLANG_HOST_HOST_CALLABLE, regularCppCompiler);
return SLANG_OK;
}
return SLANG_FAIL;
}
```
It is generally recommended to use [slang-llvm](#slang-llvm) if that is appropriate, but to switch to using a [regular C/C++ compiler](#regular-cpp) when debugging is needed. This should be largely transparent to most code.
Executing CPU Code
==================
In typical Slang operation when code is compiled it produces either source or a binary that can then be loaded by another API such as a rendering API. With CPU code the binary produced could be saved to a file and then executed as an exe or a shared library/dll. In practice though it is common to want to be able to execute compiled code immediately. Having to save off to a file and then load again can be awkward. It is also not necessarily the case that code needs to be saved to a file to be executed.
To handle being able call code directly, code can be compiled using the [host-callable](#host-callable).
For pass through compilation of C/C++ this mechanism allows any functions marked for export to be directly queried. Marking for export is a C/C++ compiler specific feature. Look at the definition of `SLANG_PRELUDE_EXPORT` in the [C++ prelude](#prelude).
For a complete example on how to execute CPU code using `spGetEntryPointHostCallable`/`getEntryPointHostCallable` look at code in `example/cpu-hello-world`.
<a id="abi"/>Application Binary Interface (ABI)
===
Say we have some Slang source like the following:
```
struct Thing { int a; int b; }
Texture2D<float> tex;
SamplerState sampler;
RWStructuredBuffer<int> outputBuffer;
ConstantBuffer<Thing> thing3;
[numthreads(4, 1, 1)]
void computeMain(
uint3 dispatchThreadID : SV_DispatchThreadID,
uniform Thing thing,
uniform Thing thing2)
{
// ...
}
```
When compiled into a [shader compile style](#compile-style) shared library/dll/host-callable - how is it invoked? An entry point in the Slang source code produces several exported functions. The 'default' exported function has the same name as the entry point in the original source. It has the signature
```
void computeMain(ComputeVaryingInput* varyingInput, UniformEntryPointParams* uniformParams, UniformState* uniformState);
```
NOTE! Using `main` as an entry point name should be avoided if CPU is a target because it typically causes compilation errors due it's normal C/C++ usage.
ComputeVaryingInput is defined in the prelude as
```
struct ComputeVaryingInput
{
uint3 startGroupID;
uint3 endGroupID;
};
```
`ComputeVaryingInput` allows specifying a range of groupIDs to execute - all the ids in a grid from startGroup to endGroup, but not including the endGroupIDs. Most compute APIs allow specifying an x,y,z extent on 'dispatch'. This would be equivalent as having startGroupID = { 0, 0, 0} and endGroupID = { x, y, z }. The exported function allows setting a range of groupIDs such that client code could dispatch different parts of the work to different cores. This group range mechanism was chosen as the 'default' mechanism as it is most likely to achieve the best performance.
There are two other functions that consist of the entry point name postfixed with `_Thread` and `_Group`. For the entry point 'computeMain' these functions would be accessible from the shared library interface as `computeMain_Group` and `computeMain_Thread`. `_Group` has the same signature as the listed for computeMain, but it doesn't execute a range, only the single group specified by startGroupID (endGroupID is ignored). That is all of the threads within the group (as specified by `[numthreads]`) will be executed in a single call.
It may be desirable to have even finer control of how execution takes place down to the level of individual 'thread's and this can be achieved with the `_Thread` style. The signature looks as follows
```
struct ComputeThreadVaryingInput
{
uint3 groupID;
uint3 groupThreadID;
};
void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, UniformEntryPointParams* uniformParams, UniformState* uniformState);
```
When invoking the kernel at the `thread` level it is a question of updating the groupID/groupThreadID, to specify which thread of the computation to execute. For the example above we have `[numthreads(4, 1, 1)]`. This means groupThreadID.x can vary from 0-3 and .y and .z must be 0. That groupID.x indicates which 'group of 4' to execute. So groupID.x = 1, with groupThreadID.x=0,1,2,3 runs the 4th, 5th, 6th and 7th 'thread'. Being able to invoke each thread in this way is flexible - in that any specific thread can specified and executed. It is not necessarily very efficient because there is the call overhead and a small amount of extra work that is performed inside the kernel.
Note that the `_Thread` style signature is likely to change to support 'groupshared' variables in the near future.
In terms of performance the 'default' function is probably the most efficient for most common usages. The `_Group` style allows for slightly less loop overhead, but with many invocations this will likely be drowned out by the extra call/setup overhead. The `_Thread` style in most situations will be the slowest, with even more call overhead, and less options for the C/C++ compiler to use faster paths.
The UniformState and UniformEntryPointParams struct typically vary by shader. UniformState holds 'normal' bindings, whereas UniformEntryPointParams hold the uniform entry point parameters. Where specific bindings or parameters are located can be determined by reflection. The structures for the example above would be something like the following...
```
struct UniformEntryPointParams
{
Thing thing;
Thing thing2;
};
struct UniformState
{
Texture2D<float > tex;
SamplerState sampler;
RWStructuredBuffer<int32_t> outputBuffer;
Thing* thing3;
};
```
Notice that of the entry point parameters `dispatchThreadID` is not part of UniformEntryPointParams and this is because it is not uniform.
`ConstantBuffer` and `ParameterBlock` will become pointers to the type they hold (as `thing3` is in the above structure).
`StructuredBuffer<T>`,`RWStructuredBuffer<T>` become
```
T* data;
size_t count;
```
`ByteAddressBuffer`, `RWByteAddressBuffer` become
```
uint32_t* data;
size_t sizeInBytes;
```
Resource types become pointers to interfaces that implement their features. For example `Texture2D` become a pointer to a `ITexture2D` interface that has to be implemented in client side code. Similarly SamplerState and SamplerComparisonState become `ISamplerState` and `ISamplerComparisonState`.
The actual definitions for the interfaces for resource types, and types are specified in 'slang-cpp-types.h' in the `prelude` directory.
## Unsized arrays
Unsized arrays can be used, which are indicated by an array with no size as in `[]`. For example
```
RWStructuredBuffer<int> arrayOfArrays[];
```
With normal 'sized' arrays, the elements are just stored contiguously within wherever they are defined. With an unsized array they map to `Array<T>` which is...
```
T* data;
size_t count;
```
Note that there is no method in the shader source to get the `count`, even though on the CPU target it is stored and easily available. This is because of the behavior on GPU targets
* That the count has to be stored elsewhere (unlike with CPU)
* On some GPU targets there is no bounds checking - accessing outside the bound values can cause *undefined behavior*
* The elements may be laid out *contiguously* on GPU
In practice this means if you want to access the `count` in shader code it will need to be passed by another mechanism - such as within a constant buffer. It is possible in the future support may be added to allow direct access of `count` work across targets transparently.
It is perhaps worth noting that the CPU allows us to have an indirection (a pointer to the unsized arrays contents) which has the potential for more flexibility than is possible on GPU targets. GPU target typically require the elements to be placed 'contiguously' from their location in their `container` - be that registers or in memory. This means on GPU targets there may be other restrictions on where unsized arrays can be placed in a structure for example, such as only at the end. If code needs to work across targets this means these restrictions will need to be followed across targets.
## <a id="context-threading"/>Context Threading
The [shader compile style](#compile-style) brings some extra issues to bare. In the HLSL compute kernel launch model application visible variables and resource are bound. As described in the [ABI](#abi) section these bindings and additional information identifying a compute thread are passed into the launch as a context. Take for example the code snippet below
```
int myGlobal;
int myFunc(int v)
{
return myGlobal + v;
}
int anotherFunc(int a, int b)
{
return a + b;
}
[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
outputBuffer[dispatchThreadID.x] = myFunc(dispatchThreadID.x) + anotherFunc(1, dispatchThreadID.y);
}
```
The function `myFunc` accesses a variable `myGlobal` that is held within a constant buffer. The function cannot be meaningfully executed without access to the context, and the context is available as a parameter passed through `computeMain` entry point at launch. This means the *actual* signature of this function in output code will be something like
```
int32_t myFunc_0(KernelContext_0 * kernelContext_0)
{
return *(&(*(&kernelContext_0->globalParams_0))->myGlobal_0) + int(1);
}
```
The context parameter has been *threaded* into this function. This *threading* will happen to any function that accesses any state that is held in the context. This behavior also happens transitively - if a function *could* call *any* another function that requires the context, the context will be threaded through to it also.
If application code assumed `myFunc` could be called with no parameters a crash would likely ensue. Note that `anotherFunc` does not have the issue because it doesn't perform an access that needs the context, and so no context threading is added.
If a global is desired in a function that wants to be called from the application, the [`__global`](#actual-global) modifier can be used.
## <a id="prelude"/>Prelude
For C++ targets, there is code to support the Slang generated source defined within the 'prelude'. The prelude is inserted text placed before the Slang generated C++ source. For the Slang command line tools as well as the test infrastructure, the prelude functionality is achieved through a `#include` in the prelude text of the `prelude/slang-cpp-prelude.h` specified with an absolute path. Doing so means other files the `slang-cpp-prelude.h` might need can be specified relatively, and include paths for the backend C/C++ compiler do not need to be modified.
The prelude needs to define
* 'Built in' types (vector, matrix, 'object'-like Texture, SamplerState etc)
* Scalar intrinsic function implementations
* Compiler based definations/tweaks
For the Slang prelude this is split into the following files...
* 'prelude/slang-cpp-prelude.h' - Header that includes all the other requirements & some compiler tweaks
* 'prelude/slang-cpp-scalar-intrinsics.h' - Scalar intrinsic implementations
* 'prelude/slang-cpp-types.h' - The 'built in types'
* 'slang.h' - Slang header is used for majority of compiler based definitions
For a client application - as long as the requirements of the generated code are met, the prelude can be implemented by whatever mechanism is appropriate for the client. For example the implementation could be replaced with another implementation, or the prelude could contain all of the required text for compilation. Setting the prelude text can be achieved with the method on the global session...
```
/** Set the 'prelude' for generated code for a 'downstream compiler'.
@param passThrough The downstream compiler for generated code that will have the prelude applied to it.
@param preludeText The text added pre-pended verbatim before the generated source
That for pass-through usage, prelude is not pre-pended, preludes are for code generation only.
*/
virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude(
SlangPassThrough passThrough,
const char* preludeText) = 0;
```
It may be useful to be able to include `slang-cpp-types.h` in C++ code to access the types that are used in the generated code. This introduces a problem in that the types used in the generated code might clash with types in client code. To work around this problem, you can wrap all of the types defined in the prelude with a namespace of your choosing. For example
```
#define SLANG_PRELUDE_NAMESPACE CPPPrelude
#include "../../prelude/slang-cpp-types.h"
```
Would wrap all the Slang prelude types in the namespace `CPPPrelude`, such that say a `StructuredBuffer<int32_t>` could be specified in C++ source code as `CPPPrelude::StructuredBuffer<int32_t>`.
The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
The *default* prelude is set to the contents of the files for C++ held in the prelude directory and is held within the Slang shared library. It is therefore typically not necessary to distribute Slang with prelude files.
Language aspects
================
# Arrays passed by Value
Slang follows the HLSL convention that arrays are passed by value. This is in contrast the C/C++ where arrays are passed by reference. To make generated C/C++ follow this convention an array is turned into a 'FixedArray' struct type. Sinces classes by default in C/C++ are passed by reference the wrapped array is also.
To get something similar to C/C++ operation the array can be marked `inout` to make it passed by reference.
Limitations
===========
# <a id="out-of-bounds"/>Out of bounds access
In HLSL code if an access is made out of bounds of a StructuredBuffer, execution proceceeds. If an out of bounds read is performed, a zeroed value is returned. If an out of bounds write is performed it's effectively a noop, as the value is discarded. On the CPU target this behavior is *not* supported by default.
For a debug CPU build an out of bounds access will assert, for a release build the behaviour is by default undefined. A limited Limited [zero index](#zero-index) out of bounds mechanism is supported, but must be enabled.
The reason for this is that such an access is difficult and/or slow to implement the identical GPU behavior on the CPU. The underlying problem is `operator[]` typically returns a reference to the contained value. If this is out of bounds - it's not clear what to return, in particular because the value may be read or written and moreover elements of the type might be written. In practice this means a global zeroed value cannot be returned.
This could be somewhat supported if code gen worked as followed for say
```
RWStructuredBuffer<float4> values;
values[3].x = 10;
```
Produces
```
template <typename T>
struct RWStructuredBuffer
{
T& at(size_t index, T& defValue) { return index < size ? values[index] : defValue; }
T* values;
size_t size;
};
RWStructuredBuffer<float4> values;
// ...
Vector<float, 3> defValue = {}; // Zero initialize such that read access returns default values
values.at(3).x = 10;
```
Note that '[] 'would be turned into the `at` function, which takes the default value as a parameter provided by the caller. If this is then written to then only the defValue is corrupted. Even this mechanism not be quite right, because if we write and then read again from the out of bounds reference in HLSL we may expect that 0 is returned, whereas here we get the value that was last written.
## <a id="zero-index"/>Zero index bound checking
If bounds checking is wanted in order to avoid undefined behavior and limit how memory is accessed `zero indexed` bounds checking might be appropriate. When enabled if an access is out of bounds the value at the zero index is returned. This is quite different behavior than the typical GPU behavior, but is fairly efficient and simple to implement. Importantly it means behavior is well defined and always 'in range' assuming there is an element.
To enable zero indexing bounds checking pass in the define `SLANG_ENABLE_BOUND_ZERO_INDEX` to a Slang compilation. This define is passed down to C++ and CUDA compilations, and the code in the CUDA and C++ preludes implement the feature. Note that zero indexed bounds checking will slow down accesses that are checked.
The C++ implementation of the feature can be seen by looking at the file "prelude/slang-cpp-types.h". For CUDA "prelude/slang-cuda-prelude.h".
The bounds checking macros are guarded such it is possible to replace the implementations, without directly altering the prelude.
TODO
====
# Main
* groupshared is not yet supported
* Output of header files
* Output multiple entry points
# Internal Slang compiler features
These issues are more internal Slang features/improvements
* Currently only generates C++ code, it would be fairly straight forward to support C (especially if we have 'intrinsic definitions')
* Have 'intrinsic definitions' in standard library - such that they can be generated where appropriate
+ This will simplify the C/C++ code generation as means Slang language will generate must of the appropriate code
* Currently 'construct' IR inst is supported as is, we may want to split out to separate instructions for specific scenarios
* Refactoring around swizzle. Currently in emit it has to check for a variety of scenarios - could be simplified with an IR pass and perhaps more specific instructions.

View file

@ -1,333 +0,0 @@
Slang CUDA Target Support
=========================
Slang has preliminary support for producing CUDA source, and PTX binaries using [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html).
NOTE! NVRTC is only available for 64-bit operating systems. On Windows Visual Studio make sure you are compiling for 'x64' and/or use 64 bit Slang binaries.
# Features
* Can compile Slang source into CUDA source code
* Supports compute style shaders
* Supports a 'bindless' CPU like model
* Can compile CUDA source to PTX through 'pass through' mechansism
# Limitations
These limitations apply to Slang transpiling to CUDA.
* Only supports the 'texture object' style binding (The texture object API is only supported on devices of compute capability 3.0 or higher. )
* Samplers are not separate objects in CUDA - they are combined into a single 'TextureObject'. So samplers are effectively ignored on CUDA targets.
* When using a TextureArray.Sample (layered texture in CUDA) - the index will be treated as an int, as this is all CUDA allows
* Care must be used in using `WaveGetLaneIndex` wave intrinsic - it will only give the right results for appropriate launches
* CUDA 'surfaces' are used for textures which are read/write (aka RWTexture).
The following are a work in progress or not implemented but are planned to be so in the future
* Some resource types remain unsupported, and not all methods on all types are supported
# How it works
For producing PTX binaries Slang uses [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html). NVRTC dll/shared library has to be available to Slang (for example in the appropriate PATH for example) for it to be able to produce PTX.
The NVRTC compiler can be accessed directly via the pass through mechanism and is identified by the enum value `SLANG_PASS_THROUGH_NVRTC`.
Much like other targets that use downstream compilers Slang can be used to compile CUDA source directly to PTX via the pass through mechansism. The Slang command line options will broadly be mapped down to the appropriate options for the NVRTC compilation. In the API the `SlangCompileTarget` for CUDA is `SLANG_CUDA_SOURCE` and for PTX is `SLANG_PTX`. These can also be specified on the Slang command line as `-target cuda` and `-target ptx`.
## Locating NVRTC
Finding NVRTC can require some nuance if a specific version is required. On the command line the `-nvrtc-path` option can be used to set the `path` to NVRTC. Also `spProcessCommandLineArguments`/`processCommandLineArguments` with `-nvrtc-path` or `setDownstreamCompilerPath` with `SLANG_PASS_THROUGH_NVRTC` can be used to set the location and/or name of NVRTC via the API.
Important points of note are
* The name of the shared library should *not* include any extension (such as `.dll`/`.so`/`.dynlib`) or prefix (such as `lib`).
* The path also *doesn't* have to be path, it can just be the shared library name. Doing so will mean it will be searched for by whatever the default mechanism is on the target.
* If a path and/or name is specified for NVRTC - this will be the *only* version searched for.
If a path/name is *not* specified for NVRTC, Slang will attempt to load a shared library called `nvrtc`. For non Windows targets this should be enough to find and load the latest version.
On Windows NVRTC dlls have a name the contains the version number, for example `nvrtc64_102_0.dll`. This will lead to the load of just `nvrtc` to fail. One approach to fix this is to place the NVRTC dll and associated files in the same directory as slang.dll, and rename the main dll to `nvrtc.dll`. Another approach is to specify directly on the command line the name including the version, as previously discussed. For example
`-nvrtc-path nvrtc64_102_0`
will load NVRTC 10.2 assuming that version of the dll can be found via the normal lookup mechanism.
On Windows if NVRTC is not loadable directly as 'nvrtc' Slang will attempt to search for the newest version of NVRTC on your system. The places searched are...
* The instance directory (where the slang.dll and/or program exe is)
* The CUDA_PATH enivonment variable (if set)
* Directories in PATH that look like a CUDA installation.
If a candidate is found via an earlier mechanism, subsequent searches are not performed. If multiple candidates are found, Slang tries the newest version first.
Binding
=======
Say we have some Slang source like the following:
```
struct Thing { int a; int b; }
Texture2D<float> tex;
SamplerState sampler;
RWStructuredBuffer<int> outputBuffer;
ConstantBuffer<Thing> thing3;
[numthreads(4, 1, 1)]
void computeMain(
uint3 dispatchThreadID : SV_DispatchThreadID,
uniform Thing thing,
uniform Thing thing2)
{
// ...
}
```
This will be turned into a CUDA entry point with
```
struct UniformEntryPointParams
{
Thing thing;
Thing thing2;
};
struct UniformState
{
CUtexObject tex; // This is the combination of a texture and a sampler(!)
SamplerState sampler; // This variable exists within the layout, but it's value is not used.
RWStructuredBuffer<int32_t> outputBuffer; // This is implemented as a template in the CUDA prelude. It's just a pointer, and a size
Thing* thing3; // Constant buffers map to pointers
};
// [numthreads(4, 1, 1)]
extern "C" __global__ void computeMain(UniformEntryPointParams* params, UniformState* uniformState)
```
With CUDA - the caller specifies how threading is broken up, so `[numthreads]` is available through reflection, and in a comment in output source code but does not produce varying code.
The UniformState and UniformEntryPointParams struct typically vary by shader. UniformState holds 'normal' bindings, whereas UniformEntryPointParams hold the uniform entry point parameters. Where specific bindings or parameters are located can be determined by reflection. The structures for the example above would be something like the following...
`StructuredBuffer<T>`,`RWStructuredBuffer<T>` become
```
T* data;
size_t count;
```
`ByteAddressBuffer`, `RWByteAddressBuffer` become
```
uint32_t* data;
size_t sizeInBytes;
```
## Texture
Read only textures will be bound as the opaque CUDA type CUtexObject. This type is the combination of both a texture AND a sampler. This is somewhat different from HLSL, where there can be separate `SamplerState` variables. This allows access of a single texture binding with different types of sampling.
If code relies on this behavior it will be necessary to bind multiple CtexObjects with different sampler settings, accessing the same texture data.
Slang has some preliminary support for TextureSampler type - a combined Texture and SamplerState. To write Slang code that can target CUDA and other platforms using this mechanism will expose the semantics appropriately within the source.
Load is only supported for Texture1D, and the mip map selection argument is ignored. This is because there is tex1Dfetch and no higher dimensional equivalents. CUDA also only allows such access if the backing array is linear memory - meaning the bound texture cannot have mip maps - thus making the mip map parameter superfluous anyway. RWTexture does allow Load on other texture types.
## RWTexture
RWTexture types are converted into CUsurfObject type.
In regular CUDA it is not possible to do a format conversion on an access to a CUsurfObject. Slang does add support for hardware write conversions where they are available. To enable the feature it is necessary to attribute your RWTexture with `format`. For example
```
[format("rg16f")]
RWTexture2D<float2> rwt2D_2;
```
The format names used are the same as for [GLSL layout format types](https://www.khronos.org/opengl/wiki/Layout_Qualifier_(GLSL)). If no format is specified Slang will *assume* that the format is the same as the type specified.
Note that the format attribution is on variables/parameters/fields and not part of the type system. This means that if you have a scenario like...
```
[format(rg16f)]
RWTexture2d<float2> g_texture;
float2 getValue(RWTexture2D<float2> t)
{
return t[int2(0, 0)];
}
void doThing()
{
float2 v = getValue(g_texture);
}
```
Even `getValue` will receive t *without* the format attribute, and so will access it, presumably erroneously. A workaround for this specific scenario would be to attribute the parameter
```
float2 getValue([format("rg16f")] RWTexture2D<float2> t)
{
return t[int2(0, 0)];
}
```
This will only work correctly if `getValue` is called with a `t` that has that format attribute. As it stands no checking is performed on this matching so no error or warning will be produced if there is a mismatch.
There is limited software support for doing a conversion on reading. Currently this only supports only 1D, 2D, 3D RWTexture, backed with half1, half2 or half4. For this path to work NVRTC must have the `cuda_fp16.h` and associated files available. Please check the section on `Half Support`.
If hardware read conversions are desired, this can be achieved by having a Texture<T> that uses the surface of a RWTexture<T>. Using the Texture<T> not only allows hardware conversion but also filtering.
It is also worth noting that CUsurfObjects in CUDA are NOT allowed to have mip maps.
By default surface access uses cudaBoundaryModeZero, this can be replaced using the macro SLANG_CUDA_BOUNDARY_MODE in the CUDA prelude. For HW format conversions the macro SLANG_PTX_BOUNDARY_MODE. These boundary settings are in effect global for the whole of the kernel.
`SLANG_CUDA_BOUNDARY_MODE` can be one of
* cudaBoundaryModeZero causes an execution trap on out-of-bounds addresses
* cudaBoundaryModeClamp stores data at the nearest surface location (sized appropriately)
* cudaBoundaryModeTrap drops stores to out-of-bounds addresses
`SLANG_PTX_BOUNDARY_MODE` can be one of `trap`, `clamp` or `zero`. In general it is recommended to have both set to the same type of value, for example `cudaBoundaryModeZero` and `zero`.
## Sampler
Samplers are in effect ignored in CUDA output. Currently we do output a variable `SamplerState`, but this value is never accessed within the kernel and so can be ignored. More discussion on this behavior is in `Texture` section.
## Unsized arrays
Unsized arrays can be used, which are indicated by an array with no size as in `[]`. For example
```
RWStructuredBuffer<int> arrayOfArrays[];
```
With normal 'sized' arrays, the elements are just stored contiguously within wherever they are defined. With an unsized array they map to `Array<T>` which is...
```
T* data;
size_t count;
```
Note that there is no method in the shader source to get the `count`, even though on the CUDA target it is stored and easily available. This is because of the behavior on GPU targets
* That the count has to be stored elsewhere (unlike with CUDA)
* On some GPU targets there is no bounds checking - accessing outside the bound values can cause *undefined behavior*
* The elements may be laid out *contiguously* on GPU
In practice this means if you want to access the `count` in shader code it will need to be passed by another mechanism - such as within a constant buffer. It is possible in the future support may be added to allow direct access of `count` work across targets transparently.
## Prelude
For CUDA the code to support the code generated by Slang is partly defined within the 'prelude'. The prelude is inserted text placed before the generated CUDA source code. For the Slang command line tools as well as the test infrastructure, the prelude functionality is achieved through a `#include` in the prelude text of the `prelude/slang-cuda-prelude.h` specified with an absolute path. Doing so means other files the `slang-cuda-prelude.h` might need can be specified relatively, and include paths for the backend compiler do not need to be modified.
The prelude needs to define
* 'Built in' types (vector, matrix, 'object'-like Texture, SamplerState etc)
* Scalar intrinsic function implementations
* Compiler based definations/tweaks
For a client application - as long as the requirements of the generated code are met, the prelude can be implemented by whatever mechanism is appropriate for the client. For example the implementation could be replaced with another implementation, or the prelude could contain all of the required text for compilation. Setting the prelude text can be achieved with the method on the global session...
```
/** Set the 'prelude' for generated code for a 'downstream compiler'.
@param passThrough The downstream compiler for generated code that will have the prelude applied to it.
@param preludeText The text added pre-pended verbatim before the generated source
That for pass-through usage, prelude is not pre-pended, preludes are for code generation only.
*/
void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText);
```
The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
Half Support
============
Slang supports the half/float16 types on CUDA. To do so NVRTC must have access to the `cuda_fp16.h` and `cuda_fp16.hpp` files that are typically distributed as part of the CUDA SDK. When Slang detects the use of half in source, it will define `SLANG_CUDA_ENABLE_HALF` when `slang-cuda-prelude.h` is included. This will in turn try to include `cuda_fp16.h` and enable extra functionality within the prelude for half support.
Slang tries several mechanisms to locate `cuda_fp16.h` when NVRTC is initiated. The first mechanism is to look in the include paths that are passed to Slang. If `cuda_fp16.h` can be found in one of these paths, no more searching will be performed.
If this fails, the path where NVRTC is located will be searched. In that path "include" and "CUDA/include" paths will be searched. This is probably most suitable for Windows based targets, where NVRTC dll is placed along with other binaries. The "CUDA/include" path is used to try and make clear in this scenario what the contained files are for.
If this fails Slang will look for the CUDA_PATH environmental variable, as is typically set during a CUDA SDK installation.
If this fails - the prelude include of `cuda_fp16.h` will most likely fail on NVRTC invocation.
CUDA has the `__half` and `__half2` types defined in `cuda_fp16.h`. The `__half2` can produce results just as quickly as doing the same operation on `__half` - in essence for some operations `__half2` is [SIMD](https://en.wikipedia.org/wiki/SIMD) like. The half implementation in Slang tries to take advantage of this optimization.
Since Slang supports up to 4 wide vectors Slang has to build on CUDAs half support. The types `__half3` and `__half4` are implemented in `slang-cuda-prelude.h` for this reason. It is worth noting that `__half3` is made up of a `__half2` and a `__half`. As `__half2` is 4 byte aligned, this means `__half3` is actually 8 bytes, rather than 6 bytes that might be expected.
One area where this optimization isn't fully used is in comparisons - as in effect Slang treats all the vector/matrix half comparisons as if they are scalar. This could be perhaps be improved on in the future. Doing so would require using features that are not directly available in the CUDA headers.
Wave Intrinsics
===============
There is broad support for [HLSL Wave intrinsics](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12), including support for [SM 6.5 intrinsics](https://microsoft.github.io/DirectX-Specs/d3d/HLSL_ShaderModel6_5.html).
Most Wave intrinsics will work with vector, matrix or scalar types of typical built in types - `uint`, `int`, `float`, `double`, `uint64_t`, `int64_t`.
The support is provided via both the Slang core module as well as the Slang CUDA prelude found in 'prelude/slang-cuda-prelude.h'. Many Wave intrinsics are not directly applicable within CUDA which supplies a more low level mechanisms. The implementation of most Wave functions work most optimally if a 'Wave' where all lanes are used. If all lanes from index 0 to pow2(n) -1 are used (which is also true if all lanes are used) a binary reduction is typically applied. If this is not the case the implementation fallsback on a slow path which is linear in the number of active lanes, and so is typically significantly less performant.
For more a more concrete example take
```
int sum = WaveActiveSum(...);
```
When computing the sum, if all lanes (32 on CUDA), the computation will require 5 steps to complete (2^5 = 32). If say just one lane is not being used it will take 31 steps to complete (because it is now linear in amount of lanes). So just having one lane disabled required 6 times as many steps. If lanes with 0 - 15 are active, it will take 4 steps to complete (2^4 = 16).
In the future it may be possible to improve on the performance of the 'slow' path, however it will always remain the most efficient generally for all of 0 to pow2(n) - 1 lanes to be active.
It is also worth noting that lane communicating intrinsics performance will be impacted by the 'size' of the data communicated. The size here is at a minimum the amount of built in scalar types used in the processing. The CUDA language only allows direct communication with built in scalar types.
Thus
```
int3 v = ...;
int3 sum = WaveActiveSum(v);
```
Will require 3 times as many steps as the earlier scalar example just using a single int.
## WaveGetLaneIndex
'WaveGetLaneIndex' defaults to `(threadIdx.x & SLANG_CUDA_WARP_MASK)`. Depending on how the kernel is launched this could be incorrect. There are other ways to get lane index, for example using inline assembly. This mechanism though is apparently slower than the simple method used here. There is support for using the asm mechanism in the CUDA prelude using the `SLANG_USE_ASM_LANE_ID` preprocessor define to enable the feature.
There is potential to calculate the lane id using the [numthreads] markup in Slang/HLSL, but that also requires some assumptions of how that maps to a lane index.
## Unsupported Intrinsics
* Intrinsics which only work in pixel shaders
+ QuadXXXX intrinsics
OptiX Support
=============
Slang supports OptiX for raytracing. To compile raytracing programs, NVRTC must have access to the `optix.h` and dependent files that are typically distributed as part of the OptiX SDK. When Slang detects the use of raytracing in source, it will define `SLANG_CUDA_ENABLE_OPTIX` when `slang-cuda-prelude.h` is included. This will in turn try to include `optix.h`.
Slang tries several mechanisms to locate `optix.h` when NVRTC is initiated. The first mechanism is to look in the include paths that are passed to Slang. If `optix.h` can be found in one of these paths, no more searching will be performed.
If this fails, the default OptiX SDK install locations are searched. On Windows this is `%{PROGRAMDATA}\NVIDIA Corporation\OptiX SDK X.X.X\include`. On Linux this is `${HOME}/NVIDIA-OptiX-SDK-X.X.X-suffix`.
If OptiX headers cannot be found, compilation will fail.
Limitations
===========
Some features are not available because they cannot be mapped with appropriate behavior to a target. Other features are unavailable because of resources to devote to more unusual features.
* Not all Wave intrinsics are supported
* There is not complete support for all methods on 'objects' like textures etc.
* Does not currently support combined 'TextureSampler'. A Texture behaves equivalently to a TextureSampler and Samplers are ignored.
* Half type is not currently supported
* GetDimensions is not available on any Texture type currently - as there doesn't appear to be a CUDA equivalent
Language aspects
================
# Arrays passed by Value
Slang follows the HLSL convention that arrays are passed by value. This is in contrast with CUDA where arrays follow C++ conventions and are passed by reference. To make generated CUDA follow this convention an array is turned into a 'FixedArray' struct type.
To get something more similar to CUDA/C++ operation the array can be marked in out or inout to make it passed by reference.

View file

@ -1,25 +0,0 @@
Slang Design and Implementation Notes
=====================================
This directory contains documents that are primarily intended for developers working on the Slang implementation.
They are not intended to be helpful to Slang users.
These documents can only be trusted to reflect the state of the codebase or the plans of their authors at the time they were written. Changes to the implementation are not expected to always come with matching changes to these documents, so some amount of drift is to be expected.
Developers interested in contributing to Slang might want to start with the [Overview](overview.md) document, which describes the overall compilation pipeline that Slang uses and the purpose of the various steps (both implemented and planned).
The [Coding Conventions](coding-conventions.md) document describes the conventions that should be followed in all code added to the Slang project.
The [Interfaces](interfaces.md) document describes the high-level design plan for Slang's interfaces and generics features.
The [Declaration References](decl-refs.md) document is intended to help out developers who are mystified by the heavily used `DeclRef` type in the compiler implementation.
The [Intermediate Representation (IR)](ir.md) document describes the design of Slang's internal IR.
The [Existential Types](existential-types.md) document goes into some detail about what "existential types" are in the context of the Slang language, and explains how we may go about supporting them.
The [Capabilities](capabilities.md) document explains the proposed model for how Slang will support general notions of profile- or capability-based overloading/dispatch.
The [Casting](casting.md) document explains how casting works in the slang C++ compiler code base.
The [Experimental API Interfaces](experimental.md) document explains how experimental Slang API changes are to be deployed.

View file

@ -1,333 +0,0 @@
Reverse Mode Autodiff (Out of Date)
==================================
This document serves as a design reference for reverse-mode auto-diff in the Slang compiler.
## Reverse-Mode Passes
Rather than implementing reverse-mode as a separate pass, Slang implements this as a series of independent passes:
If a function needs a reverse-mode version generated:
- *Linearize* the function, and all dependencies.
- *Propagate* differential types through the linearized code.
- *Unzip* by moving primal insts to before differential insts.
- *Transpose* the differential insts.
## Linearization (Forward-mode)
### Overview
(This is a incomplete section. More details coming soon)
Consider an arbitrary function `float f(float a, float b, float c, ..., z)` which takes in N inputs and generates one output `y`. Linearization aims to generate the first-order Taylor expansion of f about _all_ of it's inputs.
Mathematically, the forward derivative `fwd_f` represents `df/da * (a_0 - a) + df/db * (b_0 - b) + ...`, where `a_0` is the value at which the Taylor expansion was produced. The quantity `a_0 - a` is known as the 'differential' (for brevity we'll denote them da, db, dc, etc..), and there is at-most one differential per input.
Thus, the new function's signature should be `fwd_f(float a, float da, float b, float db, float c, float dc, ...)`. For simplicity, we'll use *pairs* instead of interleaving the original and differential parameters. We use the intrinsic `DifferentialPair<T>` (or for short: `DP<T>`) to denote this.
The signature we use is then `fwd_f(DP<float> a, DP<float> b, DP<float> c)`
An example of linearization:
```C
float f(float a, float b)
{
if (a > 0)
{
return a + b + 2.0 * a * b;
}
else
{
return sqrt(a);
}
}
```
We'll write out the SSA form of this function.
```C
float f_SSA(float a, float b)
{
bool _b1 = a > 0;
if (_b1)
{
float _t1 = a + b;
float _t2 = 2.0 * a;
float _t3 = _t2 * b;
float _t4 = _t1 + _t3;
return _t4;
}
else
{
float _t1 = sqrt(a);
return _t1;
}
}
DP<float> f_SSA(DP<float> dpa, DP<float> dpb)
{
bool _b1 = dpa.p > 0;
if (_b1)
{
float _t1 = dpa.p + dpb.p;
float _t1_d = dpa.d + dpb.d;
float _t2 = 2.0 * dpa.p;
float _t2_d = 0.0 * dpa.p + 2.0 * dpa.d;
float _t3 = _t2 * dpb.p;
float _t3_d = _t2_d * dpb.p + _t2 * dpb.d;
float _t4 = _t1 + _t3;
float _t4_d = _t1_d + _t3_d;
return DP<float>(_t4, _t4_d);
}
else
{
DP<float> _t1_dp = sqrt_fwd(dpa);
return DP<float>(_t1_dp.p, _t1_dp.d);
}
}
```
In the result, the primal part of the pair holds the original computation, while the differential part computes the dot product of the differentials with the derivatives of the function's output w.r.t each input.
## Propagation
This step takes a linearized function and propagates information about which instructions are computing a differential and which ones are part of the primal (original) computation.
Assuming first-order differentiation only:
The approach will be to mark any instructions that extract the differential from the differential pair as a differential. Then any instruction that uses the differential is itself marked as a differential and so on. The only exception is the call instruction which is either non-differentiable (do nothing) or differentiable and returns a pair (follow the same process)
Here's the above example with propagated type information (we use float.D to denote intermediaries that have been marked as differential, and also expand everything so that each line has a single operation)
```C
DP<float> f_SSA_Proped(DP<float> dpa, DP<float> dpb)
{
bool _b1 = dpa.p > 0;
if (_b1)
{
float _t1 = dpa.p + dpb.p;
float.D _q1_d = dpa.d;
float.D _q2_d = dpb.d;
float.D _t1_d = _q1_d + _q2_d;
float _t2 = 2.0 * dpa.p;
float.D _q2_d = dpa.d;
float.D _q3_d = 2.0 * dpa.d;
float _q4 = dpa.p;
float.D _q4_d = 0.0 * dpa.p;
float.D _t2_d = _q4_d + _q3_d;
float _t3 = _t2 * dpb.p;
float _q5 = dpb.p;
float.D _q6_d = _q5 * _t2_d;
float.D _q7_d = dpb.d;
float.D _q8_d = _t2 * _q7_d
float _t3_d = _q6_d + _q8_d;
float _t4 = _t1 + _t3;
float.D _t4_d = _t1_d + _t3_d;
return DP<float>(_t4, _t4_d);
}
else
{
DP<float> _t1_dp = sqrt_fwd(dpa);
float _q1 = _t1_dp.p;
float.D _q1_d = _t1_dp.d;
return DP<float>(_q1, _q1_d);
}
}
```
## Unzipping
This is a fairly simple process when there is no control flow. We simply move all non-differential instructions to before the first differential instruction.
When there is control flow, we need to be a bit more careful: the key is to *replicate* the control flow graph once for primal and once for the differential.
Here's the previous example unzipped:
```C
DP<float> f_SSA_Proped(DP<float> dpa, DP<float> dpb)
{
bool _b1 = dpa.p > 0;
float _t1, _t2, _q4, _t3, _q5, _t3_d, _t4, _q1;
if (_b1)
{
_t1 = dpa.p + dpb.p;
_t2 = 2.0 * dpa.p;
_q4 = dpa.p;
_t3 = _t2 * dpb.p;
_q5 = dpb.p;
_t4 = _t1 + _t3;
}
else
{
_q1 = sqrt_fwd(DP<float>(dpa.p, 0.0));
}
// Note here that we have to 'store' all the intermediaries
// _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally
// the tradeoff between fwd_mode and rev_mode
if (_b1)
{
float.D _q1_d = dpa.d;
float.D _q2_d = dpb.d;
float.D _t1_d = _q1_d + _q2_d;
float.D _q2_d = dpa.d;
float.D _q3_d = 2.0 * dpa.d;
float.D _q4_d = 0.0 * dpa.p;
float.D _t2_d = _q4_d + _q3_d;
float.D _q6_d = _q5 * _t2_d;
float.D _q7_d = dpb.d;
float.D _q8_d = _t2 * _q7_d
float.D _t3_d = _q6_d + _q8_d;
float.D _t4_d = _t1_d + _t3_d;
return DP<float>(_t4, _t4_d);
}
else
{
DP<float> _t1_dp = sqrt_fwd(dpa);
float.D _q1_d = _t1_dp.d;
return DP<float>(_q1, _q1_d);
}
}
```
## Transposition
### Overview
This transposition pass _assumes_ that provided function is linear in it's differentials.
It is out of scope of this project to attempt to enforce that constraint for user-defined differential code.
For transposition we walk all differential instructions in reverse starting from the return statement, and apply the following rules:
We'll have an accumulator dictionary `Dictionary<IRInst, IRInst> accMap` holding assignments for
intermediaries which don't have concrete variables. When we add a pair (A, C) and (A, B) already exists, this will form the pair (A, ADD(C, B)) in the dictionary. (ADD will be replaced with a call to `T.dadd` for a generic type T)
- If `inst` is a `RETURN(A)`, add pair `(A, d_out)` to `accMap`
- If an instruction is `MUL(P, D)` where D is the differential, add pair `(D, MUL(P, accMap[this_inst]))` to `accMap`
- If an instruction is `ADD(D1, D2)`, where both D1 and D2 are differentials (this is the only config that should occur), then add pair `(D1, accMap[this_inst])` to `accMap`
- If an instruction is `CALL(f_fwd, (P1, D1), (P2, D2), ...)`, create variables D1v, D2v, ... for D1, D2, ..., then replace with `CALL(f_rev, (P1, D1v), (P2, D2v), ..., accMap[this_inst])`, and finally add pairs `(D1, LOAD[D1v]), (D2, LOAD[D2v]), ...` to `accMap`
```C
void f_SSA_Rev(inout DP<float> dpa, inout DP<float> dpb, float dout)
{
bool _b1 = dpa.p > 0;
float _t1, _t2, _q4, _t3, _q5, _t3_d, _t4, _q1;
if (_b1)
{
_t1 = dpa.p + dpb.p;
_t2 = 2.0 * dpa.p;
_q4 = dpa.p;
_t3 = _t2 * dpb.p;
_q5 = dpb.p;
_t4 = _t1 + _t3;
}
else
{
_q1 = sqrt_fwd(DP<float>(dpa.p, 0.0));
}
// Note here that we have to 'store' all the intermediaries
// _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally
// the tradeoff between fwd_mode and rev_mode
if (_b1)
{
float.D _t4_rev = d_out;
float.D _t1_rev = _t4_rev;
float.D _t3_rev = _t4_rev;
float.D _q8_rev = _t3_rev;
float.D _q6_rev = _t3_rev;
float.D _q7_rev = _t2 * _q8_rev;
dpb.d += _q7_rev;
float.D _t2_rev = _q5 * _q6_rev;
float.D _q4_rev = _t2_rev;
float.D _q3_rev = _t2_rev;
dpa.d += 2.0 * _q3_rev;
float.D _q1_rev = _t1_rev;
float.D _q2_rev = _t1_rev;
dpb.d += _q2_rev;
dpa.d += _q1_rev;
}
else
{
_q1_rev = d_out;
DP<float> dpa_copy;
sqrt_rev(dpa_copy, _q1_rev);
dpa.d += dpa_copy.d;
}
}
```

View file

@ -1,396 +0,0 @@
<!--The goal of this set of documents is to describe the design of Slang's automatic differentiation passes, along with the mechanisms & passes used to support various features. -->
This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
## What is Automatic Differentiation?
Before diving into the design of the automatic differentiation (for brevity, we will call it 'auto-diff') passes, it is important to understand the end goal of what auto-diff tries to achieve.
The over-arching goal of Slang's auto-diff is to enable the user to compute derivatives of a given shader program or function's output w.r.t its input parameters. This critical compiler feature enables users to quickly use their shaders with gradient-based parameter optimization algorithms, which forms the backbone of modern machine learning systems. It enables users to train and deploy graphics systems that contain ML primitives (like multi-layer perceptron's or MLPs) or use their shader programs as differentiable primitives within larger ML pipelines.
### More Resources
Here are some links to resources that talk more about differentiable programming from a more mathematical perspective:
1. UCSD CSE 291 (Spring 2024): https://cseweb.ucsd.edu/~tzli/cse291/sp2024/
2. UW CSE 5990 (Winter 2024): https://sites.google.com/cs.washington.edu/cse-599o-dppl
## Definition of Derivatives
This section is based off of these slides: https://cseweb.ucsd.edu/~tzli/cse291/sp2024/lectures/03_forward_mode.pdf.
Here, we establish the mathematical definition of derivatives, starting with a simple 1D case (function with a single input and output), and extending to the general case of functions mapping multiple inputs to multiple outputs.
To avoid confusion, we will denote mathematical functions using LaTeX italic script ($f$, $g$, etc..) and programs that compute these functions with markdown code (`f`, `g`, etc..)
### Derivatives of scalar (1D) functions
Consider the simplest case: a smooth scalar mathematical function that maps a real number to another real number:
$$f : \mathbb{R} \to \mathbb{R}$$
There are several definitions for a derivative, but we will use the definition that a derivative is the *closest linear approximation* of the output function at a given input location.
Concretely, given a specific input $x$, we can create a linear approximation of the function $f$ around $x$ as follows:
$$ f(x + dx) \approx f(x) + Df(x) \cdot dx $$
<!--// TODO: Add image here.-->
This can also be understood as a geometric 'tangent' to the function at $x$. $Df(x)$ is the slope of $f$ at $x$, i.e. $\frac{\partial f}{\partial x}$, and $dx$ is the perturbation away from $x$. Our approximation is linear as a function of the perturbation $dx$. Note that no matter how non-linear or complex the underlying function $f(x)$ is, the approximation is always linear (this property becomes very important later).
### Forward-mode derivative functions
Now consider a concrete program `f` that computes some function.
```C
// Computes square of x
float f(float x)
{
return x * x;
}
```
What should its derivative program look like? We the need the output $f(x)$ and the product of derivative at $x$, $Df(x)$ with the differential $dx$.
In Slang, we put both of these together into a single function, called the *forward-mode derivative* function, which takes in a pair $(x, dx)$ returns a pair $(f(x), Df(x)\cdot dx)$ Note that in auto-diff literature, this is also often referred to as the *total derivative* function.
```C
DifferentialPair<float> fwd_f(DifferentialPair<float> dpx)
{
float x = dpx.getPrimal(); // Can also be accessed via property dpx.p
float dx = dpx.getDifferential(); // Can also be accessed via property dpx.d
return makePair(x * x, (2 * x) * dx);
}
```
Note that `(2 * x)` is the multiplier corresponding to $Df(x)$. We refer to $x$ and $f(x)$ as "*primal*" values and the perturbations $dx$ and $Df(x)\cdot dx$ as "*differential*" values. The reason for this separation is that the "*differential*" output values are always linear w.r.t their "*differential*" inputs.
As the name implies, `DifferentialPair<T>` is a special pair type used by Slang to hold values and their corresponding differentials.
### Forward-mode derivatives for higher-dimensional functions
In practice, most functions tend to have multiple inputs and multiple outputs, i.e. $f: \mathbb{R}^N \to \mathbb{R}^M$
The definition above can be extended to higher dimensions, using the closest-linear-approximation idea. The main difference is that the derivative function represents a hyperplane rather than a line.
Effectively, we want our forward-mode derivative to compute the following:
$$ f(\mathbf{x} + \mathbf{dx}) \approx f(\mathbf{x}) + \langle Df(\mathbf{x}),\mathbf{dx}\rangle $$
Here, the input and its differential can be represented as a vector quantity $\mathbf{x}, \mathbf{dx} \in \mathbb{R}^N$ and the multiplier $Df(\mathbf{x})$ (also known as the *Jacobian* matrix) is a NxM matrix, and $\left\< \cdot,\cdot \right\>$ denotes the inner product (i.e. matrix-vector multiplication)
Here's an example of a Slang function taking in two inputs (N=2) and generating one output (M=1)
```C
// Compute length of hypotenuse.
float f(float x, float y)
{
return sqrt(x * x + y * y);
}
```
and its forward-mode derivative:
```C
// Closest linear approximation at x, y
DifferentialPair<float> fwd_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy)
{
float x = dpx.p;
float y = dpy.p;
float dx = dpx.d;
float dy = dpx.d;
return DifferentialPair<float>(
sqrt(x * x + y * y), // f(x, y)
(x * dx + y * dy) / sqrt(x * x, y * y)); // <Df(x,y), dx>
}
```
Important note: the forward-mode function only needs to compute the inner product $\langle Df(\mathbf{x}),\mathbf{dx} \rangle$. The Jacobian matrix itself never needs to be fully materialized. This is a key design element of automatic differentiation, one which allows it to scale to huge input/output counts.
### Building Blocks: Forward-mode derivatives compose in forward order of execution.
In practice, we compute forward-mode derivatives of a complex function by decomposing them into constituent functions (or in compiler-speak: instructions) and composing the forward-mode derivative of each piece in the **same** order.
This is because of each forward derivative is a 'right-side' product (or product of Jacobian matrix with a vector)
Here's an example of this in action (consider a complex function $h$ composed of $f$ and $g$):
$$ h(\mathbf{x}) = f(g(\mathbf{x})) $$
It's forward-mode derivative is then:
$$ \langle Dh(\mathbf{x}), \mathbf{dx}\rangle = \big\langle Df(\mathbf{x}), \langle Dg(\mathbf{x}), \mathbf{dx}\rangle\big\rangle $$
which is the forward-mode derivative of the outer function $f$ evaluated on the result of the forward-mode derivative of the inner function $g$.
An example of this in Slang code:
```C
// Compute square.
float sqr(float x)
{
return x * x;
}
// Compute length of hypotenuse.
float f(float x, float y)
{
float x_sqr = sqr(x);
float y_sqr = sqr(y)
return sqrt(x_sqr + y_sqr);
}
```
The resulting derivative of `f` can be computed by composition:
```C
// Forward-mode derivative of sqr()
DifferentialPair<float> fwd_sqr(DifferentialPair<float> dpx)
{
float x = dpx.getPrimal();
float dx = dpx.getDifferential();
return DifferentialPair<float>(x * x, 2 * x * dx);
}
// Forward-mode derivative of f()
DifferentialPair<float> fwd_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy)
{
DifferentialPair<float> dp_x_sqr = fwd_sqr(dpx);
DifferentialPair<float> dp_y_sqr = fwd_sqr(dpy);
float x_sqr = dp_x_sqr.getPrimal();
float y_sqr = dp_y_sqr.getPrimal();
float x_sqr_d = dp_x_sqr.getDifferential();
float y_sqr_d = dp_y_sqr.getDifferential();
return DifferentialPair<float>(
sqrt(x_sqr + y_sqr),
(x_sqr_d + y_sqr_d) / sqrt(x_sqr + y_sqr));
}
```
### Tip: Extracting partial derivatives from a forward-mode derivative (i.e. a 'total' derivative)
As we discussed above, forward-mode derivatives compute $\langle Df(\mathbf{x}),\mathbf{dx}\rangle$ rather than what you may be used to seeing in a calculus course (e.g. partial derivatives like $\frac{\partial f}{\partial x}$).
In fact, the forward-mode derivative is simply an product of the partial derivative w.r.t each input parameter multiplied by their differential perturbations $\frac{\partial f}{\partial x} * dx + \frac{\partial f}{\partial x} * dy$. This is the reason for the alternative name: *total derivative*.
Thus, partial derivative can be obtained by successively setting each input's differential to 1 (and 0 for everything else)
Example:
```C
// Compute partial derivative w.r.t x (pass dx=1.0)
float df_dx = fwd_f(DifferentialPair<float>(x, 1.0), DifferentialPair<float>(y, 0.0)).d;
// Compute partial derivaive w.r.t y (pass dy=1.0)
float df_dy = fwd_f(DifferentialPair<float>(x, 0.0), DifferentialPair<float>(y, 1.0)).d;
```
### Tip: Testing forward-mode derivatives using the first principles of calculus (i.e. the *finite difference* method)
In Calculus, partial derivatives of a function are often defined in a 'black box' manner using limits, by perturbing a single parameter by an infinitesimal amount:
$$ \frac{\partial f}{\partial x} = \lim_{dx\to 0} \frac{f(x + dx) - f(x - dx)}{2 * dx} $$
At the moment, we cannot leverage programming languages to compute true inifinitesimal limits, but we can replace $dx \to 0$ with a sufficiently small $\epsilon$ leading to the following 'test' to check if derivatives produced by automatic differentiation match with their true mathematical expected values.
Here's an example of using this idea to test functions (many autodiff tests were written this way)
```C
// Compute partial derivative w.r.t x analytically
float df_dx_ad = fwd_f(DifferentialPair<float>(x, 1.0), DifferentialPair<float>(y, 0.0))
// Compute partial derivative w.r.t x through the finite difference (FD) method.
float eps = 1e-4
float df_dx_fd = (f(x + eps, y) - f(x - eps, y)) / (2 * eps);
// If computed correctly, df_dx_ad and df_dx_fd are very close.
```
**Caveats:**
Since the finite difference method only produces a biased estimate of the derivative, the result is only numerically *close* to the auto-diff-based result. Poorly behaved functions (those that rapidly change, or are discontinuous or otherwise non-differentiable) will result in a (expected) mismatch between FD and AD results.
## Reverse-mode derivative functions
This section is based off of these slides: https://cseweb.ucsd.edu/~tzli/cse291/sp2024/lectures/05_reverse_mode.pdf.
### Motivation: Challenges with scaling forward-mode derivatives
A big problem with forward-mode derivatives is their inability to scale to great parameter counts.
Machine learning pipelines often compute derivatives of a large complex pipeline with millions or even billions of input parameters, but a single output value, i.e. the *loss* or *objective* function, frequently denoted by $\mathcal{L}$.
Computing $\frac{\partial \mathcal{L}}{\partial x_i}$ for $N$ inputs $x_i$ using the one-hot vector approach will involve invoking the forward-mode derivative function $N$ times.
The reason for this limitation is that forward-mode derivatives pass derivatives from the inputs through to the outputs by computing the dot-product $\left\< Df(\mathbf{x}),\mathbf{dx}\right\>$.
Instead, we employ a different approach called the reverse-mode derivative, which propagates differentials *backwards* from outputs to inputs.
### Key Idea: Generate code to compute $\langle \frac{\partial \mathcal{L}}{\partial f}, Df(\mathbf{x})\rangle$ rather than $\langle Df(\mathbf{x}),\mathbf{dx}\rangle$
The fundamental building blocks of reverse-mode derivatives are the **left-side inner product**. That is, the product of a vector of derivatives of w.r.t outputs $\frac{\partial \mathcal{L}}{\partial f}$ with the Jacobian matrix $Df(\mathbf{x})$.
An important thing to keep in mind is that it does not necessarily matter what the scalar quantity $\mathcal{L}$ is. The goal of this product is to propagate the derivatives of any scalar value $\mathcal{L}$ w.r.t output vector $f(\mathbf{x})$ (i.e., $\frac{\partial \mathcal{L}}{\partial f}$) into derivatives of that same scalar value $\mathcal{L}$ w.r.t the input vector $\mathbf{x}$ (i.e., $\frac{\partial \mathcal{L}}{\partial \mathbf{x}}$).
Here's an example of a Slang function computing the `reverse-mode derivative`.
```C
// Compute length of hypotenuse
float f(float x, float y)
{
return sqrt(x * x + y * y);
}
// Reverse-mode derivative of f. dOutput represents the derivative dL/dOutput of the output w.r.t scalar value.
void rev_f(inout DifferentialPair<float> dpx, inout DifferentialPair<float> dpy, float dOutput)
{
float x = dpx.getPrimal();
float y = dpy.getPrimal();
float t = 1.0 / (sqrt(x * x + y * y));
dpx = DifferentialPair<float>(
x, // The primal part of the return value is *always* copied in from the input as-is.
dOutput * x * t); // The differential part for x is the derivative dL/dx computed as
// (dL/dOutput) * (dOutput/dx), where dOutput/dx = x / sqrt(x*x+y*y).
dpy = DifferentialPair<float>(
y,
dOutput * y * t); // The differential part for y is the derivative dL/dy computed as
// (dL/dOutput) * (dOutput/dy), where dOutput/dy = y / sqrt(x*x+y*y).
}
```
Note that `rev_f` accepts derivatives w.r.t the output value as the input, and returns derivatives w.r.t inputs as its output (through `inout` parameters). `rev_f` still needs the primal values `x` and `y` to compute the derivatives, so those are still passed in as an input through the primal part of the differential pair.
Also note that the reverse-mode derivative function does not have to compute the primal result value (its return is void). The reason for this is a matter of convenience: reverse-mode derivatives are often invoked after all the primal functions, and there is typically no need for these values. We go into more detail on this topic in the checkpointing chapter.
The reverse mode function can be used to compute both `dOutput/dx` and `dOutput/dy` with a single invocation (unlike the forward-mode case where we had to invoke `fwd_f` once for each input)
```C
DifferentialPair<float> dpx = makePair<float>(x, 0.f); // Initialize diff-value to 0 (not necessary)
DifferentialPair<float> dpx = makePair<float>(y, 0.f); // Initialize diff-value to 0 (not necessary)
rev_f(dpx, dpy, 1.0); // Pass 1.0 for dL/dOutput so that the results are (1.0 * dOutput/dx) and (1.0 * dOutput/dy)
float doutput_dx = dpx.getDifferential();
float doutput_dy = dpy.getDifferential();
```
### Extension to multiple outputs
The extension to multiple outputs is fairly natural. Each output gets a separate input for its derivative.
Here is an example:
```C
// Computation involving multiple inputs and outputs.
float2 f_multi_output(float x, float y)
{
return float2(
x * x,
x + y);
}
// Reverse-mode derivative of 'f_multi_output'. The derivative of the outputs is also a vector quantity
// (type follows from return type of f_multi_output)
void rev_f_multi_output(DifferentialPair<float> dpx, DifferentialPair<float> dpy, float2 dOut)
{
float x = dpx.getPrimal();
float y = dpy.getPrimal();
dpx = DifferentialPair<float>(x, dOut[0] * 2 * x + dOut[1]);
dpy = DifferentialPair<float>(x, dOut[1]);
}
```
### Jacobian method: Generate forward- and reverse-mode derivatives from first principles.
A simple way to figure out what the generated reverse (or forward) derivative function is supposed to compute is to write down the entire Jacobian function. That is, write down the partial derivative of each input w.r.t each output
$$
D\mathbf{f}(\mathbf{x}) = \begin{bmatrix}
\partial f_0 / \partial x & \partial f_0 / \partial y \\
\partial f_1 / \partial x & \partial f_1 / \partial y \\
\end{bmatrix} =
\begin{bmatrix}
2x & 0.0 \\
1.0 & 1.0 \\
\end{bmatrix}
$$
The **reverse-mode derivative**'s outputs should match the left-product of this matrix with the vector of derivatives w.r.t outputs:
$$ \left\langle \frac{\partial \mathcal{L}}{\partial \mathbf{f}}, D\mathbf{f}(\mathbf{x})\right\rangle =
\begin{bmatrix}
\frac{\partial \mathcal{L}}{\partial f_0} & \frac{\partial \mathcal{L}}{\partial f_1}
\end{bmatrix}
\begin{bmatrix}
2x & 0.0 \\
1.0 & 1.0 \\
\end{bmatrix} =
\begin{bmatrix} \left(\frac{\partial \mathcal{L}}{\partial f_0} \cdot 2x + \frac{\partial \mathcal{L}}{\partial f_1}\right) & \frac{\partial \mathcal{L}}{\partial f_1} \end{bmatrix}
$$
and the **forward-mode derivative**'s outputs should match the right-product of this matrix with the vector of differentials of the inputs:
$$ \langle D\mathbf{f}(\mathbf{x}), d\mathbf{x}\rangle =
\begin{bmatrix}
2x & 0.0 \\
1.0 & 1.0 \\
\end{bmatrix}
\begin{bmatrix}
dx \\ dy
\end{bmatrix} =
\begin{bmatrix} 2x \cdot dx & dx + dy \end{bmatrix}
$$
Note that when we generate derivative code in practice, we do not materialize the full Jacobian matrix, and instead use the composition property to chain together derivatives at the instruction level.
However, the resulting code is equivalent to the Jacobian method (mathematically), and it is a good, analytical way to confirm that the generated code is indeed correct (or when thinking about what the derivative of a particular instruction/set of instructions should be)
### Building Blocks: Reverse-mode derivatives compose in reverse order of execution.
A consequence of using the 'left-side inner product' is that derivatives of a composite function must be computed in the reverse of the order of primal computation.
Here's an example of a composite function $h$ (similar to the example used in forward-mode building blocks):
$$ h(\mathbf{x}) = f(g(\mathbf{x})) $$
where (for brevity):
$$ \mathbf{y} = g(\mathbf{x}) $$
The reverse-mode derivative function for $h$ can be written as the composition of the reverse-mode derivatives of $f$ and $g$
$$ \left\langle \frac{\partial L}{\partial h}, Dh(\mathbf{x})\right\rangle = \left\langle \left\langle \frac{\partial L}{\partial h}, Df(\mathbf{y})\right\rangle , Dg(\mathbf{x})\right\rangle $$
Note the 'backward' order here. We must first pass the derivatives through the outer function $f$, and then pass the result through the inner function $g$ to compute derivatives w.r.t inner-most inputs $\mathbf{x}$. This process of passing derivatives backwards is often referred to as *backpropagation*.
A more concrete Slang example of the same:
```C
// Compute square
float sqr(float x)
{
return x * x;
}
// Compute length of hypotenuse
float f(float x, float y)
{
return sqrt(sqr(x) + sqr(y));
}
```
The derivative functions are then:
```C
void rev_sqr(DifferentialPair<float> dpx, float dOutput)
{
float x = dpx.getPrimal();
dpx = DifferentialPair<float>(x, dOutput * 2 * x);
}
void rev_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy, float dOut)
{
float t = 0.5f / sqrt(x * x + y * y);
float d_xsqr = t * dOut; // Calculate derivatives w.r.t output of sqr(x)
float d_ysqr = t * dOut; // Calculate derivatives w.r.t output of sqr(y)
rev_sqr(dpx, d_xsqr); // Propagate to x
rev_sqr(dpx, d_ysqr); // Propagate to y
}
```
When comparing `rev_f`'s implementation to `fwd_f`, note the order of computing derivative w.r.t `sqr` (in `rev_f`, `rev_sqr` is called at the end, while in `fwd_f` it is called at the beginning)

View file

@ -1,92 +0,0 @@
This document details auto-diff-related decorations that are lowered in to the IR to help annotate methods with relevant information.
## `[Differentiable]`
The `[Differentiable]` attribute is used to mark functions as being differentiable. The auto-diff process will only touch functions that are marked explicitly as `[Differentiable]`. All other functions are considered non-differentiable and calls to such functions from a differentiable function are simply copied as-is with no transformation.
Further, only `[Differentiable]` methods are checked during the derivative data-flow pass. This decorator is translated into `BackwardDifferentiableAttribute` (which implies both forward and backward differentiability), and then lowered into the IR `OpBackwardDifferentiableDecoration`
**Note:** `[Differentiable]` was previously implemented as two separate decorators `[ForwardDifferentiable]` and `[BackwardDifferentiable]` to denote differentiability with each type of auto-diff transformation. However, these are now **deprecated**. The preferred approach is to use only `[Differentiable]`
`fwd_diff` and `bwd_diff` cannot be directly called on methods that don't have the `[Differentiable]` tag (will result in an error). If non-`[Differentiable]` methods are called from within a `[Differentiable]` method, they must be wrapped in `no_diff()` operation (enforced by the [derivative data-flow analysis pass](./types.md#derivative-data-flow-analysis) )
### `[Differentiable]` for `interface` Requirements
The `[Differentiable]` attribute can also be used to decorate interface requirements. In this case, the attribute is handled in a slightly different manner, since we do not have access to the concrete implementations.
The process is roughly as follows:
1. During the semantic checking step, when checking a method that is an interface requirement (in `checkCallableDeclCommon` in `slang-check-decl.cpp`), we check if the method has a `[Differentiable]` attribute
2. If yes, we construct create a set of new method declarations, one for the forward-mode derivative (`ForwardDerivativeRequirementDecl`) and one for the reverse-mode derivative (`BackwardDerivativeRequirementDecl`), with the appropriate translated function types and insert them into the same interface.
3. Insert a new member into the original method to reference the new declarations (`DerivativeRequirementReferenceDecl`)
4. When lowering to IR, the `DerivativeRequirementReferenceDecl` member is converted into a custom derivative reference by adding the `OpBackwardDerivativeDecoration(deriv-fn-req-key)` and `OpForwardDerivativeDecoration(deriv-fn-req-key)` decorations on the primal method's requirement key.
Here is an example of what this would look like:
```C
interface IFoo
{
[Differentiable]
float bar(float);
};
// After checking & lowering
interface IFoo_after_checking_and_lowering
{
[BackwardDerivative(bar_bwd)]
[ForwardDerivative(bar_fwd)]
float bar(float);
void bar_bwd(inout DifferentialPair<float>, float);
DifferentialPair<float> bar_fwd(DifferentialPair<float>);
};
```
**Note:** All conforming types must _also_ declare their corresponding implementations as differentiable so that their derivative implementations are synthesized to match the interface signature. In this sense, the `[Differentiable]` attribute is part of the functions signature, so a `[Differentiable]` interface requirement can only be satisfied by a `[Differentiable]` function implementation
### `[TreatAsDifferentiable]`
In large codebases where some interfaces may have several possible implementations, it may not be reasonable to have to mark all possible implementations with `[Differentiable]`, especially if certain implementations use hacks or workarounds that need additional consideration before they can be marked `[Differentiable]`
In such cases, we provide the `[TreatAsDifferentiable]` decoration (AST node: `TreatAsDifferentiableAttribute`, IR: `OpTreatAsDifferentiableDecoration`), which instructs the auto-diff passes to construct an 'empty' function that returns a 0 (or 0-equivalent) for the derivative values. This allows the signature of a `[TreatAsDifferentiable]` function to match a `[Differentiable]` requirement without actually having to produce a derivative.
## Custom derivative decorators
In many cases, it is desirable to manually specify the derivative code for a method rather than let the auto-diff pass synthesize it from the method body. This is usually desirable if:
1. The body of the method is too complex, and there is a simpler, mathematically equivalent way to compute the same value (often the case for intrinsics like `sin(x)`, `arccos(x)`, etc..)
2. The method involves global/shared memory accesses, and synthesized derivative code may cause race conditions or be very slow due to overuse of synchronization. For this reason Slang assumes global memory accesses are non-differentiable by default, and requires that the user (or the core module) define separate accessors with different derivative semantics.
The Slang front-end provides two sets of decorators to facilitate this:
1. To reference a custom derivative function from a primal function: `[ForwardDerivative(fn)]` and `[BackwardDerivative(fn)]` (AST Nodes: `ForwardDerivativeAttribute`/`BackwardDerivativeAttribute`, IR: `OpForwardDervativeDecoration`/`OpBackwardDerivativeDecoration`), and
2. To reference a primal function from its custom derivative function: `[ForwardDerivativeOf(fn)]` and `[BackwardDerivativeOf(fn)]` (AST Nodes: `ForwardDerivativeAttributeOf`/`BackwardDerivativeAttributeOf`). These attributes are useful to provide custom derivatives for existing methods in a different file without having to edit/change that module. For instance, we use `diff.meta.slang` to provide derivatives for the core module functions in `hlsl.meta.slang`. When lowering to IR, these references are placed on the target (primal function). That way both sets of decorations are lowered on the primal function.
These decorators also work on generically defined methods, as well as struct methods. Similar to how function calls work, these decorators also work on overloaded methods (and reuse the `ResolveInoke` infrastructure to perform resolution)
### Checking custom derivative signatures
To ensure that the user-provided derivatives agree with the expected signature, as well as resolve the appropriate method when multiple overloads are available, we check the signature of the custom derivative function against the translated version of the primal function. This currently occurs in `checkDerivativeAttribute()`/`checkDerivativeOfAttribute()`.
The checking process re-uses existing infrastructure from `ResolveInvoke`, by constructing a temporary invoke expr to call the user-provided derivative using a set of 'imaginary' arguments according to the translated type of the primal method. If `ResolveInvoke` is successful, the provided derivative signature is considered to be a match. This approach also automatically allows us to resolve overloaded methods, account for generic types and type coercion.
## `[PrimalSubstitute(fn)]` and `[PrimalSubstituteOf(fn)]`
In some cases, we face the opposite problem that inspired custom derivatives. That is, we want the compiler to auto-synthesize the derivative from the function body, but there _is_ no function body to translate.
This frequently occurs with hardware intrinsic operations that are lowered into special op-codes that map to hardware units, such as texture sampling & interpolation operations.
However, these operations do have reference 'software' implementations which can be used to produce the derivative.
To allow user code to use the fast hardware intrinsics for the primal pass, but use synthesized derivatives for the derivative pass, we provide decorators `[PrimalSubstitute(ref-fn)]` and `[PrimalSubstituteOf(orig-fn)]` (AST Node: `PrimalSubstituteAttribute`/`PrimalSubstituteOfAttribute`, IR: `OpPrimalSubstituteDecoration`), that can be used to provide a reference implementation for the auto-diff pass.
Example:
```C
[PrimalSubstitute(sampleTexture_ref)]
float sampleTexture(TexHandle2D tex, float2 uv)
{
// Hardware intrinsics
}
float sampleTexture_ref(TexHandle2D tex, float2 uv)
{
// Reference SW implementation.
}
void sampleTexture_bwd(TexHandle2D tex, inout DifferentialPair<float2> dp_uv, float dOut)
{
// Backward derivate code synthesized using the reference implementation.
}
```
The implementation of `[PrimalSubstitute(fn)]` is relatively straightforward. When the transcribers are asked to synthesize a derivative of a function, they check for a `OpPrimalSubstituteDecoration`, and swap the current function out for the substitute function before proceeding with derivative synthesis.

View file

@ -1,290 +0,0 @@
This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
Before diving into this document, please review the document on [Basics](./basics.md) for the fundamentals of automatic differentiation.
# Components of the Type System
Here we detail the main components of the type system: the `IDifferentiable` interface to define differentiable types, the `DifferentialPair<T>` type to carry a primal and corresponding differential in a single type.
We also detail how auto-diff operators are type-checked (the higher-order function checking system), how the `no_diff` decoration can be used to avoid differentiation through attributed types, and the derivative data flow analysis that warns the the user of unintentionally stopping derivatives.
## `interface IDifferentiable`
Defined in core.meta.slang, `IDifferentiable` forms the basis for denoting differentiable types, both within the core module, and otherwise.
The definition of `IDifferentiable` is designed to encapsulate the following 4 items:
1. `Differential`: The type of the differential value of the conforming type. This allows custom data-structures to be defined to carry the differential values, which may be optimized for space instead of relying solely on compiler synthesis/
Since the computation of derivatives is inherently linear, we only need access to a few operations. These are:
2. `dadd(Differential, Differential) -> Differential`: Addition of two values of the differential type. It's implementation must be associative and commutative, or the resulting derivative code may be incorrect.
3. `dzero() -> Differential`: Additive identity (i.e. the zero or empty value) that can be used to initialize variables during gradient aggregation
4. `dmul<S:__BuiltinRealType>(S, Differential)`: Scalar multiplication of a real number with the differential type. It's implementation must be distributive over differential addition (`dadd`).
Points 2, 3 & 4 are derived from the concept of vector spaces. The derivative values of any Slang function always form a vector space (https://en.wikipedia.org/wiki/Vector_space).
### Derivative member associations
In certain scenarios, the compiler needs information on how the fields in the original type map to the differential type. Particularly, this is a problem when differentiate the implicit construction of a struct through braces (i.e. `{}`), represented by `kIROp_MakeStruct`. We provide the decorator `[DerivativeMember(DifferentialTypeName.fieldName)]` (ASTNode: DerivativeMemberAttribute, IR: kIROp_DerivativeMemberDecoration) to explicitly mark these associations.
Example
```C
struct MyType : IDifferentiable
{
typealias Differential = MyDiffType;
float a;
[DerivativeMember(MyDiffType.db)]
float b;
/* ... */
};
struct MyDiffType
{
float db;
};
```
### Automatic Synthesis of `IDifferentible` Conformances for Aggregate Types
It can be tedious to expect users to hand-write the associated `Differential` type, the corresponding mappings and interface methods for every user-defined `struct` type. For aggregate types, these are trivial to construct by analysing which of their components conform to `IDifferentiable`.
The synthesis proceeds in roughly the following fashion:
1. `IDifferentiable`'s components are tagged with special decorator `__builtin_requirement(unique_integer_id)` which carries an enum value from `BuiltinRequirementKind`.
2. When checking that types conform to their interfaces, if a user-provided definition does not satisfy a requirement with a built-in tag, we perform synthesis by dispatching to `trySynthesizeRequirementWitness`.
3. For _user-defined types_, Differential **types** are synthesized during conformance-checking through `trySynthesizeDifferentialAssociatedTypeRequirementWitness` by checking if each constituent type conforms to `IDifferentiable`, looking up the corresponding `Differential` type, and constructing a new aggregate type from these differential types. Note that since it is possible that a `Differential` type of a constituent member has not yet been synthesized, we have additional logic in the lookup system (`trySynthesizeRequirementWitness`) that synthesizes a temporary empty type with a `ToBeSynthesizedModifier`, so that the fields can be filled in later, when the member type undergoes conformance checking.
4. For _user-defined types_, Differential methods (`dadd`, `dzero` and `dmul`) are synthesized in `trySynthesizeDifferentialMethodRequirementWitness` by utilizing the `Differential` member and its `[DifferentialMember]` decorations to determine which fields need to be considered and the base type to use for each field. There are two synthesis patterns. The fully-inductive pattern is used for `dadd` and `dzero` which works by calling `dadd` and `dzero` respectively on the individual fields of the `Differential` type under consideration.
Example:
```C
// Synthesized from "struct T {FT1 field1; FT2 field2;}"
T.Differential dadd(T.Differential a, T.Differential b)
{
return Differential(
FT1.dadd(a.field1, b.field1),
FT2.dadd(a.field2, b.field2),
)
}
```
On the other hand, `dmul` uses the fixed-first arg pattern since the first argument is a common scalar, and proceeds inductively on all the other args.
Example:
```C
// Synthesized from "struct T {FT1 field1; FT2 field2;}"
T.Differential dmul<S:__BuiltinRealType>(S s, T.Differential a)
{
return Differential(
FT1<S>.dmul(s, a.field1),
FT2<S>.dmul(s, a.field2),
)
}
```
5. During auto-diff, the compiler can sometimes synthesize new aggregate types. The most common case is the intermediate context type (`kIROp_BackwardDerivativeIntermediateContextType`), which is lowered into a standard struct once the auto-diff pass is complete. It is important to synthesize the `IDifferentiable` conformance for such types since they may be further differentiated (through higher-order differentiation). This implementation is contained in `fillDifferentialTypeImplementationForStruct(...)` and is roughly analogous to the AST-side synthesis.
### Differentiable Type Dictionaries
During auto-diff, the IR passes frequently need to perform lookups to check if an `IRType` is differentiable, and retrieve references to the corresponding `IDifferentiable` methods. These lookups also need to work on generic parameters (that are defined inside generic containers), and existential types that are interface-typed parameters.
To accommodate this range of different type systems, Slang uses a type dictionary system that associates a dictionary of relevant types with each function. This works in the following way:
1. When `CheckTerm()` is called on an expression within a function that is marked differentiable (`[Differentiable]`), we check if the resolved type conforms to `IDifferentiable`. If so, we add this type to the dictionary along with the witness to its differentiability. The dictionary is currently located on `DifferentiableAttribute` that corresponds to the `[Differentiable]` modifier.
2. When lowering to IR, we create a `DifferentiableTypeDictionaryDecoration` which holds the IR versions of all the types in the dictionary as well as a reference to their `IDifferentiable` witness tables.
3. When synthesizing the derivative code, all the transcriber passes use `DifferentiableTypeConformanceContext::setFunc()` to load the type dictionary. `DifferentiableTypeConformanceContext` then provides convenience functions to lookup differentiable types, appropriate `IDifferentiable` methods, and construct appropriate `DifferentialPair<T>`s.
### Looking up Differential Info on _Generic_ types
Generically defined types are also lowered into the differentiable type dictionary, but rather than having a concrete witness table, the witness table is itself a parameter. When auto-diff passes need to find the differential type or place a call to the IDifferentiable methods, this is turned into a lookup on the witness table parameter (i.e. `Lookup(<InterfaceRequirementKey>, <WitnessTableParameter>)`). Note that these lookups instructions are inserted into the generic parent container rather than the inner most function.
Example:
```C
T myFunc<T:IDifferentiable>(T a)
{
return a * a;
}
// Reverse-mode differentiated version
void bwd_myFunc<T:IDifferentiable>(
inout DifferentialPair<T> dpa,
T.Differential dOut) // T.Differential is Lookup('Differential', T_Witness_Table)
{
T.Differential da = T.dzero(); // T.dzero is Lookup('dzero', T_Witness_Table)
da = T.dadd(dpa.p * dOut, da); // T.dadd is Lookup('dadd', T_Witness_Table)
da = T.dadd(dpa.p * dOut, da);
dpa = diffPair(dpa.p, da);
}
```
### Looking up Differential Info on _Existential_ types
Existential types are interface-typed values, where there are multiple possible implementations at run-time. The existential type carries information about the concrete type at run-time and is effectively a 'tagged union' of all possible types.
#### Differential type of an Existential
The differential type of an existential type is tricky to define since our type system's only restriction on the `.Differential` type is that it also conforms to `IDifferentiable`. The differential type of any interface `IInterface : IDifferentiable` is therefore the interface type `IDifferentiable`. This is problematic since Slang generally requires a static `anyValueSize` that must be a strict upper bound on the sizes of all conforming types (since this size is used to allocate space for the union). Since `IDifferentiable` is defined in the core module `core.meta.slang` and can be used by the user, it is impossible to define a reliable bound.
We instead provide a new **any-value-size inference** pass (`slang-ir-any-value-inference.h`/`slang-ir-any-value-inference.cpp`) that assembles a list of types that conform to each interface in the final linked IR and determines a relevant upper bound. This allows us to ignore types that conform to `IDifferentiable` but aren't used in the final IR, and generate a tighter upper bound.
**Future work:**
This approach, while functional, creates a locality problem since the size of `IDifferentiable` is the max of _all_ types that conform to `IDifferentiable` in visible modules, even though we only care about the subset of types that appear as `T.Differential` for `T : IInterface`. The reason for this problem is that upon performing an associated type lookup, the Slang IR drops all information about the base interface that the lookup starts from and only considers the constraint interface (in this case `Differential : IDifferentiable`).
There are several ways to resolve this issue, including (i) a static analysis pass that determines the possible set of types at each use location and propagates them to determine a narrower set of types, or (ii) generic (or 'parameterized') interfaces, such as `IDifferentiable<T>` where each version can have a different set of conforming types.
<!--#### IDifferentiable Method lookups on an Existential
All other method lookups are performed using existential-type lookups on the existential parameter. The idea is that existential-typed parameters come with a witness-table component that can be accessed by invoking `kIROp_ExtractExistentialWitnessTable` on them. This allows us to look up the `dadd`/`dzero` methods on this witness table in the same way as we did for generic types.-->
Example:
```C
interface IInterface : IDifferentiable
{
[Differentiable]
This foo(float val);
[Differentiable]
float bar();
};
float myFunc(IInterface obj, float a)
{
IInterface k = obj.foo(a);
return k.bar();
}
// Reverse-mode differentiated version (in pseudo-code corresponding to IR, some of these will get lowered further)
void bwd_myFunc(
inout DifferentialPair<IInterface> dpobj,
inout DifferentialPair<float> dpa,
float.Differential dOut) // T.Differential is Lookup('Differential', T_Witness_Table)
{
// Primal pass..
IInterface obj = dpobj.p;
IInterface k = obj.foo(a);
// .....
// Backward pass
DifferentialPair<IInterface> dpk = diffPair(k);
bwd_bar(dpk, dOut);
IDifferentiable dk = dpk.d; // Differential of `IInterface` is `IDifferentiable`
DifferentialPair<IInterface> dp = diffPair(dpobj.p);
bwd_foo(dpobj, dpa, dk);
}
```
#### Looking up `dadd()` and `dzero()` on Existential Types
There are two distinct cases for lookup on an existential type. The more common case is the closed-box existential type represented simply by an interface. Every value of this type contains a type identifier & a witness table identifier along with the value itself. The less common case is when the function calls are performed directly on the value after being cast to the concrete type.
**`dzero()` for "closed" Existential type: The `NullDifferential` Type**
For concrete and even generic types, we can initialize a derivative accumulator variable by calling the appropriate `Type.dzero()` method. This is unfortunately not possible when initializing an existential differential (which is currently of type `IDifferentiable`), since we must also initialize the type-id of this existential to one of the implementations, but we do not know which one yet since that is a run-time value that only becomes known after the first differential value is generated.
To get around this issue, we declare a special type called `NullDifferential` that acts as a "none type" for any `IDifferentiable` existential object.
**`dadd()` for "closed" Existential types: `__existential_dadd`**
We cannot directly use `dadd()` on two existential differentials of type `IDifferentiable` because we must handle the case where one of them is of type `NullDifferential` and `dadd()` is only defined for differentials of the same type.
We handle this currently by synthesizing a special method called `__existential_dadd` (`getOrCreateExistentialDAddMethod` in `slang-ir-autodiff.cpp`) that performs a run-time type-id check to see if one of the operand is of type `NullDifferential` and returns the other operand if so. If both are non-null, we dispatch to the appropriate `dadd` for the concrete type.
**`dadd()` and `dzero()` for "open" Existential types**
If we are dealing with values of the concrete type (i.e. the opened value obtained through `ExtractExistentialValue(ExistentialParam)`). Then we can perform lookups in the same way we do for generic type. All existential parameters come with a witness table. We insert instructions to extract this witness table and perform lookups accordingly. That is, for `dadd()`, we use `Lookup('dadd', ExtractExistentialWitnessTable(ExistentialParam))` and place a call to the result.
## `struct DifferentialPair<T:IDifferentiable>`
The second major component is `DifferentialPair<T:IDifferentiable>` that represents a pair of a primal value and its corresponding differential value.
The differential pair is primarily used for passing & receiving derivatives from the synthesized derivative methods, as well as for block parameters on the IR-side.
Both `fwd_diff(fn)` and `bwd_diff(fn)` act as function-to-function transformations, and so the Slang front-end translates the type of `fn` to its derivative version so the arguments can be type checked.
### Pair type lowering.
The differential pair type is a special type throughout the AST and IR passes (AST Node: `DifferentialPairType`, IR: `kIROp_DifferentialPairType`) because of its use in front-end semantic checking and when synthesizing the derivative code for the functions. Once the auto-diff passes are complete, the pair types are lowering into simple `struct`s so they can be easily emitted (`DiffPairLoweringPass` in `slang-ir-autodiff-pairs.cpp`).
We also define additional instructions for pair construction (`kIROp_MakeDifferentialPair`) and extraction (`kIROp_DifferentialPairGetDifferential` & `kIROp_DifferentialPairGetPrimal`) which are lowered into struct construction and field accessors, respectively.
### "User-code" Differential Pairs
Just as we use special IR codes for differential pairs because they have special handling in the IR passes, sometimes differential pairs should be _treated as_ regular struct types during the auto-diff passes.
This happens primarily during higher-order differentiation when the user wishes to differentiate the same code multiple times.
Slang's auto-diff approaches this by rewriting all the relevant differential pairs into 'irrelevant' differential pairs (`kIROp_DifferentialPairUserCode`) and 'irrelevant' accessors (`kIROp_DifferentialPairGetDifferentialUserCode`, `kIROp_DifferentialPairGetPrimalUserCode`) at the end of **each auto-diff iteration** so that the next iteration treats these as regular differentiable types.
The user-code versions are also lowered into `struct`s in the same way.
## Type Checking of Auto-Diff Calls (and other _higher-order_ functions)
Since `fwd_diff` and `bwd_diff` are represented as higher order functions that take a function as an input and return the derivative function, the front-end semantic checking needs some notion of higher-order functions to be able to check and lower the calls into appropriate IR.
### Higher-order Invocation Base: `HigherOrderInvokeExpr`
All higher order transformations derive from `HigherOrderInvokeExpr`. For auto-diff there are two possible expression classes `ForwardDifferentiateExpr` and `BackwardDifferentiateExpr`, both of which derive from this parent expression.
### Higher-order Function Call Checking: `HigherOrderInvokeExprCheckingActions`
Resolving the concrete method is not a trivial issue in Slang, given its support for overloading, type coercion and more. This becomes more complex with the presence of a function transformation in the chain.
For example, if we have `fwd_diff(f)(DiffPair<float>(...), DiffPair<double>(...))`, we would need to find the correct match for `f` based on its post-transform argument types.
To facilitate this we use the following workflow:
1. The `HigherOrderInvokeExprCheckingActions` base class provides a mechanism for different higher-order expressions to implement their type translation (i.e. what is the type of the transformed function).
2. The checking mechanism passes all detected overloads for `f` through the type translation and assembles a new group out of the results (the new functions are 'temporary')
3. This new group is used by `ResolveInvoke` when performing overload resolution and type coercion using the user-provided argument list.
4. The resolved signature (if there is one) is then replaced with the corresponding function reference and wrapped in the appropriate higher-order invoke.
**Example:**
Let's say we have two functions with the same name `f`: (`int -> float`, `double, double -> float`)
and we want to resolve `fwd_diff(f)(DiffPair<float>(1.0, 0.0), DiffPair<float>(0.0, 1.0))`.
The higher-order checking actions will synthesize the 'temporary' group of translated signatures (`int -> DiffPair<float>`, `DiffPair<double>, DiffPair<double> -> DiffPair<float>`).
Invoke resolution will then narrow this down to a single match (`DiffPair<double>, DiffPair<double> -> DiffPair<float>`) by automatically casting the `float`s to `double`s. Once the resolution is complete,
we return `InvokeExpr(ForwardDifferentiateExpr(f : double, double -> float), casted_args)` by wrapping the corresponding function in the corresponding higher-order expr
## Attributed Types (`no_diff` parameters)
Often, it will be necessary to prevent gradients from propagating through certain parameters, for correctness reasons. For example, values representing random samples are often not differentiated since the result may be mathematically incorrect.
Slang provides the `no_diff` operator to mark parameters as non-differentiable, even if they use a type that conforms to `IDifferentiable`
```C
float myFunc(float a, no_diff float b)
{
return a * b;
}
// Resulting fwd-mode derivative:
DiffPair<float> myFunc(DiffPair<float> dpa, float b)
{
return diffPair(dpa.p * b, dpa.d * b);
}
```
Slang uses _OpAttributedType_ to denote the IR type of such parameters. For example, the lowered type of `b` in the above example is `OpAttributedType(OpFloat, OpNoDiffAttr)`. In the front-end, this is represented through the `ModifiedType` AST node.
Sometimes, this additional layer can get in the way of things like type equality checks and other mechanisms where the `no_diff` is irrelevant. Thus, we provide the `unwrapAttributedType` helper to remove attributed type layers for such cases.
## Derivative Data-Flow Analysis
Slang has a derivative data-flow analysis pass that is performed on a per-function basis immediately after lowering to IR and before the linking step (`slang-ir-check-differentiability.h`/`slang-ir-check-differentiability.cpp`).
The job of this pass is to enforce that instructions that are of a differentiable type will propagate a derivatives, unless explicitly dropped by the user through `detach()` or `no_diff`. The reason for this is that Slang requires functions to be decorated with `[Differentiable]` to allow it to propagate derivatives. Otherwise, the function is considered non-differentiable, and effectively produces a 0 derivative. This can lead to frustrating situations where a function may be dropping non-differentiable on purpose. Example:
```C
float nonDiffFunc(float x)
{
/* ... */
}
float differentiableFunc(float x) // Forgot to annotate with [Differentiable]
{
/* ... */
}
float main(float x)
{
// User doesn't realise that the function that is supposed to be differentiable is not
// getting differentiated, because the types here are all 'float'.
//
return nonDiffFunc(x) * differentiableFunc(x);
}
```
The data-flow analysis step enforces that non-differentiable functions used in a differentiable context should get their derivative dropped explicitly. That way, it is clear to the user whether a call is getting differentiated or dropped.
Same example with `no_diff` enforcement:
```C
float nonDiffFunc(float x)
{
/* ... */
}
[Differentiable]
float differentiableFunc(float x)
{
/* ... */
}
float main(float x)
{
return no_diff(nonDiffFunc(x)) * differentiableFunc(x);
}
```
A `no_diff` can only be used directly on a function call, and turns into a `TreatAsDifferentiableDecoration` that indicates that the function will not produce a derivative.
The derivative data-flow analysis pass works similar to a standard data-flow pass:
1. We start by assembling a set of instructions that 'produce' derivatives by starting with the parameters of differentiable types (and without an explicit `no_diff`), and propagating them through each instruction in the block. An inst carries a derivative if there one of its operands carries a derivative, and the result type is differentiable.
2. We then assemble a set of instructions that expect a derivative. These are differentiable operands of differentiable functions (unless they have been marked by `no_diff`). We then reverse-propagate this set by adding in all differentiable operands (and repeating this process).
3. During this reverse-propagation, if there is any `OpCall` in the 'expect' set that is not also in the 'produce' set, then we have a situation where the gradient hasn't been explicitly dropped, and we create a user diagnostic.

View file

@ -1,271 +0,0 @@
Capabilities (Out of Date)
============
Slang aims to be a portable language for shader programming, which introduces two complementary problems:
1. We need a way to indicate that certain constructs (types, functions, etc.) are only allowed on certain targets, so that a user gets a meaningful error if they try to do something that won't work on one or more of the APIs or platforms they want to target. Similarly, the user expects to get an error if they call a fragment-shader-specific function inside of, say, compute shader code, or vice versa.
2. If the same feature can be implemented across multiple platforms, but the best (or only) implementation path differs across platforms, then we need a way to express the platform specific code and pick the right implementation per-target.
Item (2) is traditionally handled with preprocessor techniques (e.g., `#ifdef`ing the body of a function based on target platform), but that of course requires that the user invoke the Slang front end once for each target platform, and target-specific coding in a library will then "infect" code that uses that library, forcing them to invoke the front-end once per target as well.
We are especially sensitive to this problem in the compiler itself, because we have to author and maintain the Slang standard modules, which needs to (1) expose the capabilities of many platforms and (2) work across all those platforms. It would be very unfortunate if we had to build different copies of our standard modules per-target.
The intention in Slang is to solve both of these problems with a system of *capabilities*.
What is a capability?
---------------------
For our purposes a capability is a discrete feature that a compilation target either does or does not support.
We could imagine defining a capability for the presence of texture sampling operations with implicit gradients; this capability would be supported when generating fragment shader kernel code, but not when generating code for other stages.
Let's imagine a language syntax that the standard modules could use to define some *atomic* capabilities:
```
capability implicit_gradient_texture_fetches;
```
We can then imagine using attributes to indicate that a function requires a certain capability:
```
struct Texture2D
{
...
// Implicit-gradient sampling operation.
[availableFor(implicit_gradient_texture_fetches)]
float4 Sample(SamplerState s, float2 uv);
}
```
(Note that the `[availableFor(...)]` syntax is just a straw-man to write up examples, and a better name would be desirable if/when we implement this stuff.)
Given those declarations, we could then check when compiling code if the user is trying to call `Texture2D.Sample` in code compiled for a target that *doesn't* support implicit-gradient texture fetches, and issue an appropriate error.
The details on how to sequence this all in the compiler will be covered later.
Derived Capabilities
--------------------
Once we can define atomic capabilities, the next step is to be able to define *derived* capabilities.
Let's imagine that we extend our `capability` syntax so that we can define a new capability that automatically implies one or more other capabilities:
```
capability fragment : implicit_gradient_texture_fetches;
```
Here we've said that whenever the `fragment` capability is available, we can safely assume that the `implicit_gradient_texture_fetches` capability is available (but not vice versa).
Given even a rudimentary tool like that, we can start to build up capabilities that relate closely to the "profiles" in things like D3D:
```
capability d3d;
capability sm_5_0 : d3d;
capability sm_5_1 : sm_5_0;
capability sm_6_0 : sm_5_1;
...
capability d3d11 : d3d, sm_5_0;
capability d3d12 : d3d, sm_6_0;
capability khronos;
capability glsl_400 : khronos;
capability glsl_410 : glsl_400;
...
capability vulkan : khronos, glsl_450;
capability opengl : khronos;
```
Here we are saying that `sm_5_1` supports everything `sm_5_0` supports, and potentially more. We are saying that `d3d12` supports `sm_6_0` but maybe not, e.g., `sm_6_3`.
We are expressing that fact that having a `glsl_*` capability means you are on some Khronos API target, but that it doesn't specify which one.
(The exact details of these declarations obviously aren't the point; getting a good hierarchy of capabilities will take time.)
Capability Composition
----------------------
Sometimes we'll want to give a distinct name to a specific combination of capabilities, but not say that it supports anything new:
```
capability ps_5_1 = sm_5_1 & fragment;
```
Here we are saying that the `ps_5_1` capability is *equivalent* to the combination of `sm_5_1` and `fragment` (that is, if you support both `sm_5_1` and `fragment` then you support `ps_5_1` and vice versa).
Compositions should be allowed in `[availableFor(...)]` attributes (e.g., `[availableFor(vulkan & glsl_450)]`), but pre-defined compositions should be favored when possible.
When composing things with `&` it is safe for the compiler to filter out redundancies based on what it knows so that, e.g., `ps_5_0 & fragment` resolves to just `ps_5_0`.
Once we have an `&` operator for capabilities, it is easy to see that "derived" capabilities are really syntax sugar, so that a derived capability like:
```
capability A : B, C
```
could have been written instead as :
```
capability A_atomic
capability A = A_atomic & B & C
```
Where the `A_atomic` capability guarantees that `A` implies `B` and `C` but not vice versa.
It is also useful to think of an `|` operator on capabilities.
In particular if a function has multiple `[availableFor(...)]` attributes:
```
[availableFor(vulkan & fragment)]
[availableFor(d3d12 & fragment)]
void myFunc();
```
This function should be equivalent to one with just a single `[availableFor((vulkan & fragment) | (d3d12 & fragment))]` which is equivalent to `[availableFor((vulkan | d3d12) & fragment)]`.
Simplification should generally push toward "disjunctive normal form," though, rather than pursue simplifications like that.
Note that we do *not* include negation, so that capabilities are not general Boolean expressions.
Validation
----------
For a given function definition `F`, the front end will scan its body and see what it calls, and compose the capabilities required by the called functions using `&` (simplifying along the way). Call the resulting capability (in disjunctive normal form) `R`.
If `F` doesn't have an `[availableFor(...)]` attribute, then we can derive its *effective* `[availableFor(...)]` capability as `R` (this probably needs to be expressed as an iterative dataflow problem over the call graph, to handle cycles).
If `F` *does* have one or more `[availableFor(...)]` clauses that amount to a declared capability `C` (again in disjunctive normal form), then we can check that `C` implies `R` and error out if it is not the case.
A reasonable implementation would track which calls introduced which requirements, and be able to explain *why* `C` does not capture the stated requirements.
For a shader entry point, we should check it as if it had an `[availableFor(...)]` that is the OR of all the specified target profiles (e.g., `sm_5_0 | glsl_450 | ...`) ANDed with the specified stage (e.g., `fragment`).
Any error here should be reported to the user.
If an entry point has an explicit `[availableFor(...)]` then we should AND that onto the profile computed above, so that the user can restrict certain entry points to certain profiles.
In order to support separate compilation, the functions that are exported from a module should probably either have explicit availability attributes, or else they will be compiled against a kind of "default capability" used for the whole module.
Downstream code that consumes such a module would see declarations with explicit capabilities only.
Picking an appropriate "default capability" to use when compiling modules is an important challenge; it would in practice define the "min spec" to use when compiling.
Capability Overriding
---------------------
It should be possible to define multiple versions of a function, having different `[availableFor(...)]` attributes:
```
[availableFor(vulkan)] void myFunc() { ... }
[availableFor(d3d12)] void myFunc() { ... }
```
For front-end checking, these should be treated as if they were a single definition of `myFunc` with an ORed capability (e.g., `vulkan | d3d12`).
Overload resolution will pick the "best" candidate at a call site based *only* on the signatures of the function (note that this differs greatly from how profile-specific function overloading works in Cg).
The front-end will then generate initial IR code for each definition of `myFunc`.
Each of the IR functions will have the *same* mangled name, but different bodies, and each will have appropriate IR decorations to indicate the capabilities it requires.
The choice of which definition to use is then put off until IR linking for a particular target.
At that point we can look at all the IR functions matching a given mangled name, filter them according to the capabilities of the target, and then select the "best" one.
In general a definition `A` of an IR symbol is better than another definition `B` if the capabilities on `A` imply those on `B` but not versa.
(In practice this probably needs to be "the capabilities on `A` intersected with those of the target," and similarly for `B`)
This approach allows us to defer profile-based choices of functions to very late in the process. The one big "gotcha" to be aware of is when functions are overloaded based on pipeline stage, where we would then have to be careful when generating DXIL or SPIR-V modules with multiple entry points (as a single function `f` might need to be specialized twice if it calls a stage-overloaded function `g`).
Capabilities in Other Places
----------------------------
So far I've talked about capabilities on functions, but they should also be allowed on other declarations including:
- Types, to indicate that code using that type needs the given capability
- Interface conformances, to indicate that a type only conforms to the interface when the capabilities are available
- Struct fields, to indicate that the field is only present in the type when the capabilities are present
- Extension declarations, to indicate that everything in them requires the specified capabilities
We should also provide a way to specify that a `register` or other layout modifier is only applicable for specific targets/stages. Such a capability nominally exists in HLSL today, but it would be much more useful if it could be applied to specify target-API-specific bindings.
Only functions should support overloading based on capability. In all other cases there can only be one definition of an entity, and capabilities just decide when it is available.
API Extensions as Capabilities
------------------------------
One clear use case for capabilities is to represent optional extensions, including cases where a feature is "built-in" in D3D but requires an extension in Vulkan:
```
capability KHR_secret_sauce : vulkan;
[available_for(sm_7_0)] // always available for D3D Shader Model 7.0
[available_for(KHR_secret_sauce)] // Need the "secret sauce" extension for Vulkan
void improveShadows();
```
When generating code for Vulkan, we should be able to tell the user that the `improveShadows()` function requires the given extension. The user should be able to express compositions of capabilities in their `-profile` option (and similarly for the API):
```
slangc code.slang -profile vulkan+KHR_secret_sauce
```
(Note that for the command line, it is beneficial to use `+` instead of `&` to avoid conflicts with shell interpreters)
An important question is whether the compiler should automatically infer required extensions without them being specified, so that it produces SPIR-V that requires extensions the user didn't ask for.
The argument against such inference is that users should opt in to non-standard capabilities they are using, but it would be unfortunate if this in turn requires verbose command lines when invoking the compiler.
It should be possible to indicate the capabilities that a module or entry point should be compiled to use without command-line complications.
(A related challenge is when a capability can be provided by two different extensions: how should the compiler select the "right" one to use?)
Disjoint Capabilities
---------------------
Certain compositions of capabilities make no sense. If a user declared a function as needing `vulkan & d3d12` they should probably get an error message.
Knowing that certain capabilities are disjoint can also help improve the overall user experience.
If a function requires `(vulkan & extensionA) | (d3d12 & featureb)` and we know we are compiling for `vulkan` we should be able to give the user a pointed error message saying they need to ask for `extensionA`, because adding `featureB` isn't going to do any good.
As a first-pass model we could have a notion of `abstract` capabilities that are used to model the root of hierarchies of disjoint capabilities:
```
abstract capability api;
abstract capability d3d : api;
capability d3d11 : d3d;
capability d3d12 : d3d;
abstract capability khronos : api;
capability vulkan : khronos;
capability opengl : khronos;
```
As a straw man: we could have a rule that to decide if non-abstract capabilities `A` and `B` are disjoint, we look for their common ancestor in the tree of capabilities.
If the common ancestor is abstract, they are disjoint, and if not they not disjoint.
We'd also know that if the user tries to compile for a profile that includes an abstract capability but *not* some concrete capability derived from it, then that is an error (we can't generate code for just `d3d`).
The above is an over-simplification because we don't have a *tree* of capabilities, but a full *graph*, so we'd need an approach that works for the full case.
Interaction with Generics/Interfaces
------------------------------------
It should be possible for an interface requirement to have a capability requirement attached to it.
This would mean that users of the interface can only use the method/type/whatever when the capability is present (just like for any other function):
```
interface ITexture
{
float4 sampleLevel(float2 uv, float lod);
[availableFor(fragment)]
float4 sample(float2 uv); // can only call this from fragment code
}
```
When implementing an interface, any capability constraints we put on a member that satisfies an interface requirement would need to guarantee that either:
- the capabilities on our method are implied by those on the requirement (we don't require more), or
- the capabilities on the method are implied by those on the type itself, or its conformance to the interface (you can't use the conformance without the capabilities), or
- the capabilities are already implied by those the whole module is being compiled for
In each case, you need to be sure that `YourType` can't be passed as a generic argument to some function that uses just the `ITexture` interface above and have them call a method on your type from a profile that doesn't have the required capabilities.
Interaction with Heterogeneity
------------------------------
If Slang eventually supports generating CPU code as well as shaders, it should use capabilities to handle the CPU/GPU split similar to how they can be used to separate out vertex- and fragment-shader functionality.
Something like a `cpu` profile that works as a catch-all for typical host CPU capabilities would be nice, and could be used as a convenient way to mark "host" functions in a file that is otherwise compiled for a "default profile" that assumes GPU capabilities.
Conclusion
----------
Overall, the hope is that in many cases developers will be able to use capability-based partitioning and overloading of APIs to build code that only has to pass through the Slang front-end once, but that can then go through back-end code generation for each target.
In cases where this can't be achieved, the way that capability-based overloading is built into the Slang IR design means that we should be able to merge multiple target-specific definitions into one IR module, so that a module can employ target-specific specializations while still presenting a single API to consumers.

View file

@ -1,150 +0,0 @@
Casting in the Slang Compiler
=============================
The following discussion is about casting within the C++ implementation of the slang compiler.
C++'s built in mechanisms for casting (principally dynamic_cast) is problematic within the slang compiler codebase. Code using 'dynamic_cast' requires RTTI information is available, and that a type that uses it must have a vtbl (have at least one virtual member). Some problems with this...
* There are types which we want to 'dynamic_cast' that do not have, and we do not want to have a Vtbl (for example Slang::IRInst).
* There are types which a 'dynamic_cast' doesn't do quite what we want (for example casting on Type* derived types typically wants to work on their canonical type)
* We may want to replace use of dynamic_cast in the future for speed/space or other reasons
* It is common in the code base when using a 'smart pointer' type to cast it, but still return a smart pointer
To deal with these issues we need casting within Slang to follow it's own methodology. In summary it is as follows...
* Use 'as' free function to do a typical 'dynamic like' cast.
* 'as' doesn't guarantee the returned pointer points to the same object.
* For example with Type* it *actually* does the cast on the canonical type which is often a different object.
* If you want to *literally* do a dynamic cast use 'dynamicCast' free function.
* This guarantees the returned pointer points to the same object (like normal dynamic_cast)
* If you want to return a smart pointer from a cast from a smart pointer use the .as or .dynamicCast *methods*
* If you want to determine if an 'as' cast is possible on a smart pointer use the .is method
* Doing so will produce more efficient code because a new smart pointer does not need to be constructed
These functions will also work with types that do not have Vtbl - like IRInst derived types.
Both 'as' and 'dynamicCast' handle the case if the pointer is a nullptr, by returning a nullptr. If the cast succeeds the cast pointer is returned otherwise nullptr is returned. If a cast is performed with a free function it always returns a raw pointer.
So why have 'as' and 'dynamicCast' - they seem sort of similar? The primary difference is dynamicCast *must* always return a pointer to the same object, whilst 'as' *can* return a pointer to a different object if that is the desired 'normal' casting behavior for the type. This is the case for Type* when using 'as' it may return a different object - the 'canonical type' for the Type*. For a concrete example take 'NamedExpressionType', its canonical type is the type the name relates to. If you use 'as' on it - it will produce a pointer to a different object, an object that will not be castable back into a NamedExpressionType.
Also keep in mind that 'as' behavior is based on the pointer type being cast from. For any pointer to a type derived from Type it will cast the canonical type. **BUT** if the pointer is pointing to a Type derived *object*, but the pointer type is *not* derived from Type (like say RefObject*), then 'as' will behave like dynamicCast.
All this being said 'as' in usage is seen as the 'default' way to do a 'dynamic like' cast with these special behaviour appropriate for the type when necessary.
By having the free function and method versions of 'as' and 'dynamicCast', you can choose if you want a 'raw' or 'smart' pointer type returned from the cast. If you just want to test if something is a certain type, then using as/dynamicCast free functions is the faster way to do it. If you *know* that a raw pointer is ok, because the object will remain in scope, then again using the free function is better because it does less work. But as the examples following show, care is needed because if you get it wrong the object might go out of scope and leave the raw pointer pointing to a deleted object. When in doubt the safe choice is to typically use .as (or .dynamicCast if appropriate) methods.
Following example shows the different types of casting...
```C++
void someFunction(Decl* decl, Type* type)
{
RefPtr<Decl> declRefPtr(decl);
RefPtr<Type> typeRefPtr(type);
// Use of as
{
// Casting with as on a free function returns a raw pointer
GenericDecl* genericDeclRaw0 = as<GenericDecl>(decl);
// Free function again returns a raw pointer
GenericDecl* genericDeclRaw1 = as<GenericDecl>(declRefPtr);
// Using the as *method* returns a smart pointer holding the cast result
RefPtr<GenericDecl> genericDeclRefPtr0 = declRefPtr.as<GenericDecl>();
// Of course you can use auto with either
auto genericDeclRefPtr1 = declRefPtr.as<GenericDecl>();
auto genericDeclRaw2 = as<GenericDecl>(declRefPtr);
}
// Currently using as on anything not cast *from* Type is the same as dynamicCast.
// But on Type* sometimes you may want to control the cast
{
// With a NamedExpressionType sometimes you don't want 'as' behaviour - if we want to see the information about the name (not the thing
// it relates to (the canonical type)
NamedExpressionType* namedExpressionRawPtr = dynamicCast<NamedExpressionType>(type);
// Returns the smart pointer
auto namedExpressionRefPtr = typeRefPtr.as<NamedExpressionType>();
}
```
It is important to be aware of what style of cast you use where. Take for example the following function ...
```C++
RefPtr<Expr> substitute(RefPtr<Expr> expr) const
{
return DeclRefBase::Substitute(expr);
}
```
If you want to do a cast on it, you need to be careful especially about scope, for example...
```C++
RefPtr<Expr> expr = ...;
{
// Whoops! This is a problem. When using the free function, the cast is to a *raw* pointer, so obj
// receives a raw pointer. When the RefPtr returned from Substitute goes out of scope (when the statement is left)
// the ref will be removed and if the ref count was 1 destroyed. Now obj points to a freed object and so a crash is
// likely to follow in the future!
auto obj = as<RefObject>(substitute(expr));
}
// So how do we avoid this? Well it depends what the function is returning and the scope. If it's returning a smart pointer,
// you could use the .as method
{
// This can only compile if it is a smart pointer (raw pointers don't have an as method)
auto obj = substitute(expr).as<RefObject>();
}
// Another option is to put the created thing in a smart pointer so you know it's in scope
{
RefPtr<Expr> sub = substitute(expr);
// Ok as long as sub is in scope
auto obj = as<RefObject>(sub);
}
// More awkwardly you could use free function, but assign to a smart pointer, thus maintaining scope
{
RefPtr<RefObject> obj = as<RefObject>(substitute(expr));
}
```
The following code shows the change in behavior of 'as' is based on the source *pointer* type **NOT** the *object* type..
```C++
// Derives from Type
NamedExpressionType* exprType = ...;
// Will be the Type* of the *canonical* type, because the pointer is Type derived and we are using as!
Type* type0 = as<Type>(exprType);
// It' going to be pointing to a different object, because type0 is the cast of the *canonical* type, because exprType derives from Type
SLANG_ASSERT(type0 != exprType);
// If I do a dynamicCast the result is either nullptr or a pointer that *must* point to the same object
Type* type1 = dynamicCast<Type>(exprType);
SLANG_ASSERT(type1 == exprType);
// Here, the pointer is pointing to a NamedExpressionType derived object. Which derives from Type. BUT our pointer here does *not* derive from type.
RefObject* refObj = exprType;
// 'as' just looks at the from type, and it doesn't derive from Type (it's just RefObject), so it does regular as, which is dynamicCast
Type* type2 = as<Type>(refObject);
SLANG_ASSERT(type2 == exprType);
// Finally...
// Is true even though exprType is a NamedExpression, because the cast is on the canonical type
SLANG_ASSERT(as<NamedExpression>(exprType) == nullptr);
// dynamicCast is always the same object returned, so must match
SLANG_ASSERT(dynamicCast<NamedExpression>(exprType) == exprType);
```

View file

@ -1,282 +0,0 @@
Slang Project Coding Conventions
================================
Principles
----------
This document attempts to establish conventions to be used in the Slang codebase.
We have two goals for this convention.
The first goal is to make the code look relatively consistent so that it is easy to navigate and understand for contributors.
Having varying styles across different modules, files, functions, or lines of code makes the overall design and intention of the codebase harder to follow.
The second goal is to minimize the scope complexity of diffs when multiple maintainers work together on the codebase.
In the absence of an enforced style, developers tend to "clean up" code they encounter to match their personal preferences, and in so doing create additional diffs that increase the chances of merge conflicts and pain down the line.
Because the Slang codebase has passed through many hands and evolved without a pre-existing convention, these two goals can come into conflict.
We encourage developers to err on the side of leaving well enough alone (favoring the second goal).
Don't rewrite or refactor code to match these conventions unless you were already going to have to touch all of those lines of code anyway.
Note that external code that is incorporated into the project is excluded from all of these conventions.
Languages
---------
### C++
Most code in the Slang project is implemented in C++.
We currently assume support for some C++11 idioms, but have explicitly avoided adding dependencies on later versions.
As a general rule, be skeptical of "modern C++" ideas unless they are clearly better to simpler alternatives.
We are not quite in the realm of "Orthodox C++", but some of the same guidelines apply:
* Don't use exceptions for non-fatal errors (and even then support a build flag to opt out of exceptions)
* Don't use the built-in C++ RTTI system (home-grown is okay)
* Don't use the C++ variants of C headers (e.g., `<cstdio>` instead of `<stdio.h>`)
* Don't use the STL containers
* Don't use iostreams
The compiler implementation does not follow some of these guidelines at present; that should not be taken as an excuse to further the proliferation of stuff like `dynamic_cast`.
Do as we say, not as we do.
Some relatively recent C++ features that are okay to use:
* Rvalue references for "move semantics," but only if you are implementing performance-critical containers or other code where this really matters.
* `auto` on local variables, if the expected type is clear in context
* Lambdas are allowed, but think carefully about whether just declaring a subroutine would also work.
* Using `>>` to close multiple levels of templates, instead of `> >` (but did you really need all those templates?)
* `nullptr`
* `enum class`
* Range-based `for` loops
* `override`
* Default member initializers in `class`/`struct` bodies
Templates are suitable in cases where they improve clarity and type safety.
As a general rule, it is best when templated code is kept minimal, and forwards to a non-templated function that does the real work, to avoid code bloat.
Any use of template metaprogramming would need to prove itself exceptionally useful to pay for the increase in cognitive complexity.
We don't want to be in the business of maintaining "clever" code.
As a general rule, `const` should be used sparingly and only with things that are logically "value types."
If you find yourself having to `const`-qualify a lot of member function in type that you expect to be used as a heap-allocated object, then something has probably gone wrong.
As a general rule, default to making the implementation of a type `public`, and only encapsulate state or operations with `private` when you find that there are complex semantics or invariants that can't be provided without a heavier hand.
### Slang
The Slang project codebase also includes `.slang` files implementing the Slang core module, as well as various test cases and examples.
The conventions described here are thus the "official" recommendations for how users should format Slang code.
To the extent possible, we will try to apply the same basic conventions to both C++ and Slang.
In places where we decide that the two languages merit different rules, we will point it out.
Files and Includes
------------------
### File Names
All files and directories that are added to codebase should have names that contain only ASCII lower-case letters, digits, dots (`.`) and dashes (`-`).
Operating systems still vary greatly in their handling of case sensitivity for file names, and non-ASCII code points are handled with even less consistency; sticking to a restricted subset of ASCII helps avoids some messy interactions between case-insensitive file systems and case-sensitive source-control systems like Git.
As with all these conventions, files from external projects are exempted from these restrictions.
### Naming of Source and Header Files
In general the C++ codebase should be organized around logical features/modules/subsystem, each of which has a single `.h` file and zero or more `.cpp` files to implement it.
If there is a single `.cpp` file, its name should match the header: e.g., `parser.h` and `parser.cpp`.
If there is more than one `.cpp` file, their names should start with the header name: e.g., `parser.h` and `parser-decls.cpp` and `parser-exprs.cpp`.
If there are declarations that need to be shared by the `.cpp` files, but shouldn't appear in the public interface, then can go in a `*-impl.h` header (e.g., `parser-impl.h`).
Use best judgement when deciding what counts as a "feature." One class per file is almost always overkill, but the codebase currently leans too far in the other direction, with some oversized source files.
### Headers
Every header file should have an include guard.
Within the implementation we can use `#pragma once`, but exported API headers (`slang.h`) should use traditional `#ifdef` style guards (and they should be consumable as both C and C++).
A header should include or forward-declare everything it needs in order to compile.
It is *not* up to the programmer who `#include`s a header to sort out the dependencies.
Avoid umbrella or "catch-all" headers.
### Source Files
Every source file should start by including the header for its feature/module, before any other includes (this helps ensure that the header correctly includes its dependencies).
Functions that are only needed within that one source file can be marked `static`, but we should avoid using the same name for functions in different files (in order to support lumped/unified builds).
### Includes
In general, includes should be grouped as follows:
* First, the correspodning feature/module header, if we are in a source file
* Next, any `<>`-enlosed includes for system/OS headers
* Next, any `""`-enclosed includes for external/third-part code that is stored in the project repository
* Finally, any includes for other features in the project
Within each group, includes should be sorted alphabetically.
If this breaks because of ordering issues for system/OS/third-party headers (e.g., `<windows.h>` must be included before `<GL/GL.h>`), then ideally those includes should be mediated by a Slang-project-internal header that features can include.
Namespaces
----------
Favor fewer namespaces when possible.
Small programs may not need any.
All standard module code that a Slang user might link against should go in the `Slang` namespace for now, to avoid any possibility of clashes in a static linking scenario.
The public C API is obviously an exception to this.
Code Formatting
------------------------------
- For C++ files, please format using `clang-format`; `.clang-format` files in
the source tree define the style.
- For CMake files, please format using `gersemi`
- For shell scripts, please format using `shfmt`
- For YAML files, please use `prettier`
The formatting for the codebase is overall specified by the
[`extras/formatting.sh`](./extras/formatting.sh) script.
If you open a pull request and the formatting is incorrect, you can comment
`/format` and a bot will format your code for you.
Naming
------
### Casing
Types should in general use `UpperCamelCase`. This includes `struct`s, `class`es, `enum`s and `typedef`s.
Values should in general use `lowerCamelCase`. This includes functions, methods, local variables, global variables, parameters, fields, etc.
Macros should in general use `SCREAMING_SNAKE_CASE`.
It is important to prefix all macros (e.g., with `SLANG_`) to avoid collisions, since `namespace`s don't affect macros).
In names using camel case, acronyms and initialisms should appear eniterly in either upper or lower case (e.g., `D3DThing d3dThing`) and not be capitalized as if they were ordinary words (e.g., `D3dThing d3dThing`).
Note that this also applies to uses of "ID" as an abbreviation for "identifier" (e.g., use `nodeID` instead of `nodeId`).
### Prefixes
Prefixes based on types (e.g., `p` for pointers) should never be used.
Global variables should have a `g` prefix, e.g. `gCounter`.
Non-`const` `static` class members can have an `s` prefix if that suits your fancy.
Of course, both of these should be avoided, so this shouldn't come up often.
Constant data (in the sense of `static const`) should have a `k` prefix.
In contexts where "information hiding" is relevant/important, such as when a type has both `public` and `private` members, or just has certain operations/fields that are considered "implementation details" that most clients should not be using, an `m_` prefix on member variables and a `_` prefix on member functions is allowed (but not required).
In function parameter lists, an `in`, `out`, or `io` prefix can be added to a parameter name to indicate whether a pointer/reference/buffer is intended to be used for input, output, or both input and output.
For example:
```c++
void copyData(void* outBuffer, void const* inBuffer, size_t size);
Result lookupThing(Key k, Thing& outThing);
void maybeAppendExtraNames(std::vector<Name>& ioNames);
```
Public C APIs will prefix all symbol names while following the casing convention (e.g. `SlangModule`, `slangLoadModule`, etc.).
### Enums
C-style `enum` should use the following convention:
```c++
enum Color
{
kColor_Red,
kColor_Green,
kColor_Blue,
kColorCount,
};
```
When using `enum class`, drop the `k` and type name as prefix, but retain the `UpperCamelCase` tag names:
```c++
enum class Color
{
Red,
Green,
Blue,
Count,
};
```
When defining a set of flags, separate the type definition from the `enum`:
```c++
typedef unsigned int Axes;
enum
{
kAxes_None = 0,
kAxis_X = 1 << 0,
kAxis_Y = 1 << 1,
kAxis_Z = 1 << 2,
kAxes_All = kAxis_X | kAxis_Y | kAxis_Z,
};
```
Note that the type name reflects the plural case, while the cases that represent individual bits are named with a singular prefix.
In public APIs, all `enum`s should use the style of separating the type definition from the `enum`, and all cases should use `SCREAMING_SNAKE_CASE`:
```c++
typedef unsigned int SlangAxes;
enum
{
SLANG_AXES_NONE = 0,
SLANG_AXIS_X = 1 << 0,
SLANG_AXIS_Y = 1 << 1,
SLANG_AXIS_Z = 1 << 2,
SLANG_AXES_ALL = SLANG_AXIS_X | SLANG_AXIS_Y | SLANG_AXIS_Z,
};
```
### General
Names should default to the English language and US spellings, to match the dominant conventions of contemporary open-source projects.
Function names should either be named with action verbs (`get`, `set`, `create`, `emit`, `parse`, etc.) or read as questions (`isEnabled`, `shouldEmit`, etc.).
Whenever possible, compiler concepts should be named using the most widely-understood term available: e.g., we use `Token` over `Lexeme`, and `Lexer` over `Scanner` simply because they appear to be the more common names.
Avoid abbreviations and initialisms unless they are already widely established across the codebase; a longer name may be cumbersome to write in the moment, but the code will probably be read many more times than it is written, so clarity should be preferred.
An important exception to this is common compiler concepts or techniques which may have laboriously long names: e.g., Static Single Assignment (SSA), Sparse Conditional Copy Propagation (SCCP), etc.
One gotcha particular to compiler front-ends is that almost every synonym for "type" has some kind of established technical meaning; most notably the term "kind" has a precise meaning that is relevant in our domain.
It is common practice in C and C++ to define tagged union types with a selector field called a "type" or "kind," which does not usually match this technical definition.
If a developer wants to avoid confusion, they are encouraged to use the term "flavor" instead of "type" or "kind" since this term (while a bit silly) is less commonly used in the literature.
Comments and Documentation
--------------------------
You probably know the drill: comments are good, but an out-of-date comment can be worse than no comment at all.
Try to write comments that explain the "why" of your code more than the "what."
When implementing a textbook algorithm or technique, it may help to imagine giving the reviewer of your code a brief tutorial on the topic.
In cases where comments would benefit from formatting, use Markdown syntax.
We do not currently have a setup for extracting documentation from comments, but if we add one we will ensure that it works with Markdown.
When writing comments, please be aware that your words could be read by many people, from a variety of cultures and backgrounds.
Default to a plain-spoken and professional tone and avoid using slang, idiom, profanity, etc.

View file

@ -1,166 +0,0 @@
Understanding Declaration References (Out of Date)
====================================
This document is intended as a reference for developers working on the Slang compiler implementation.
As you work on the code, you'll probably notice a lot of places where we use the `DeclRef<T>` type:
* Expressions like `VarExpr` and `MemberExpr` are subclasses of `DeclRefExpr`, which holds a `DeclRef<Decl>`.
* The most common subclass of `Type` is `DeclRefType`, which holds a `DeclRef<Decl>` for the type declaration.
* Named types (references to `typedef`s) hold a `DeclRef<TypedefDecl>`
* The name lookup process relies a lot on `DeclRef<ContainerDecl>`
So what in the world is a `DeclRef`?
The short answer is that a `DeclRef` packages up two things:
1. A pointer to a `Decl` in the parsed program AST
2. A set of "substitutions" to be applied to that decl
Why do we need `DeclRef`s?
--------------------------
In a compiler for a simple language, we might represent a reference to a declaration as simply a pointer to the AST node for the declaration, or some kind of handle/ID that references that AST node.
A representation like that will work in simple cases, for example:
```hlsl
struct Cell { int value };
Cell a = { 3 };
int b = a.value + 4;
```
In this case, the expression node for `a.value` can directly reference the declaration of the field `Cell::value`, and from that we can conclude that the type of the field (and hence the expression) is `int`.
In contrast, things get more complicated as soon as we have a language with generics:
```hlsl
struct Cell<T> { T value; };
// ...
Cell<int> a = { 3 };
int b = a.value + 4;
```
In this case, if we try to have the expression `a.value` only reference `Cell::value`, then the best we can do is conclude that the field has type `T`.
In order to correctly type the `a.value` expression, we need enough additional context to know that it references `Cell<int>::value`, and from that to be able to conclude that a reference to `T` in that context is equivalent to `int`.
We can represent that information as a substitution which maps `T` to `int`:
```
[ Cell::T => int ]
```
Then we can encode a reference to `Cell<int>::value` as a reference to the single declaration `Cell::value` with such a substitution applied:
```
Cell::value [Cell::T => int]
```
If we then want to query the type of this field, we can first look up the type stored on the AST (which will be a reference to `Cell::T`) and apply the substitutions from our field reference to get:
```
Cell::T [Cell::T => int]
```
Of course, we can then simplify the reference by applying the substitutions, to get:
```
int
```
How is this implemented?
------------------------
At the highest level, a `DeclRef` consists of a pointer to a declaration (a `Decl*`) plus a single-linked list of `Substution`s.
These substitutions fill in the missing information for any declarations on the ancestor chain for the declaration.
Each ancestor of a declaration can introduce an expected substitution along the chain:
* Most declarations don't introduce any substitutions: e.g., when referencing a non-generic `struct` we don't need any addition information.
* A surrounding generic declaration requires a `GenericSubstitution` which specifies the type argument to be plugged in for each type parameter of the declaration.
* A surrounding `interface` declaration usually requires a `ThisTypeSubstitution` that identifies the specific type on which an interface member has been looked up.
All of the expected substitutions should be in place in the general case, even when we might not have additional information. E.g., within a generic declaration like this:
```hlsl
struct Cell<T>
{
void a();
void b() { a(); }
}
```
The reference to `a` in the body of `b` will be represented as a declaration reference to `Cell::a` with a substitution that maps `[Cell::T => Cell::T]`. This might seem superfluous, but it makes it clear that we are "applying" the generic to arguments (even if they are in some sense placeholder arguments), and not trying to refer to an unspecialized generic.
There are a few places in the compiler where we might currently bend these rules, but experience has shown that failing to include appropriate substitutions is more often than not a source of bugs.
What in the world is a "this type" substitution?
------------------------------------------------
When using interface-constrained generics, we need a way to invoke methods of the interface on instances of a generic parameter type.
For example, consider this code:
```hlsl
interface IVehicle
{
associatedtype Driver;
Driver getDriver();
}
void ticketDriver<V : IVehicle>(V vehicle)
{
V.Driver driver = vehicle.getDriver();
sentTicketTo(driver);
}
```
In the expression `vehicle.getDriver`, we are referencing the declaration of `IVehicle::getDriver`, and so a naive reading tells us that the return type of the call is `IVehicle.Driver`, but that is an associated type and not a concrete type. It is clear in context that the expression `vehicle.getDriver()` should result in a `V.Driver`.
The way the compiler encodes that is that we treat the expression `v.getDriver` as first "up-casting" the value `v` (of type `V`) to the interface `IVehicle`. We know this is valid because of the generic constraint `V : IVehicle`. The result of the up-cast operation is an expression with a type that references `IVehicle`, but with a substitution to track the fact that the underlying implementation type is `V`. This amounts to something like:
```
IVehicle [IVehicle.This => V]
```
where `IVehicle.This` is a way to refer to "the concrete type that is implementing `IVehicle`".
Looking up the `getDriver` method on this up-cast expression yields a reference to:
```
IVehicle::getDriver [IVehicle.This => V]
```
And extracting the return type of that method gives us a reference to the type:
```
IVehicle::Driver [IVehicle.This => V]
```
which turns out to be exactly what the front end produces when it evaluates the type reference `V.Driver`.
As this example shows, a "this type" substitution allows us to refer to interface members while retaining knowledge of the specific type on which those members were looked up, so that we can compute correct references to things like associated types.
What does any of this mean for me?
----------------------------------
When working in the Slang compiler code, try to be aware of whether you should be working with a plain `Decl*` or a full `DeclRef`.
There are many queries like "what is the return type of this function?" that typically only make sense if you are applying them to a `DeclRef`.
The `syntax.h` file defines helpers for most of the existing declaration AST nodes for querying properties that should represent substitutions (the type of a variable, the return type of a function, etc.).
If you are writing code that is working with a `DeclRef`, try to use these accessors and avoid being tempted to extract the bare declaration and start querying it.
Some things like `Modifier`s aren't (currently) affected by substitutions, so it can make sense to query them on a bare declaration instead of a `DeclRef`.
Conclusion
----------
Working with `DeclRef`s can be a bit obtuse at first, but they are the most elegant solution we've found to the problems that arise when dealing with generics and interfaces in the compiler front-end. Hopefully this document gives you enough context to see why they are important, and hints at how their representation in the compiler helps us implement some cases that would be tricky otherwise.

View file

@ -1,252 +0,0 @@
Existential Types
=================
This document attempts to provide some background on "existential types" as they pertain to the design and implementation of Slang.
The features described here are *not* reflected in the current implementation, so this is mostly a sketch of where we can go with the language and compiler.
Background: Generics and Universal Quantification
-------------------------------------------------
Currently Slang supports using interfaces as generic constraints. Let's use a contrived example:
```hlsl
interface IImage { float4 getValue(float2 uv); }
float4 offsetImage<T : IImage>(T image, float2 uv)
{
float2 offset = ...;
return image.getValue(uv + offset)
}
```
Generics like this are a form of "universal quantification" in the terminology of type theory.
This makes sense, because *for all* types `T` that satisfy the constraints, `offsetImage` provides an implementation of its functionality.
When we think of translating `offsetImage` to code, we might at first only think about how we can specialize it once we have a particular type `T` in mind.
However, we can also imagine trying to generate one body of code that can implement `offsetImage` for *any* type `T`, given some kind of runtime representation of types.
For example, we might generate C++ code like:
```c++
struct IImageWitnessTable { float4 (*getValue)(void* obj, float2 uv); };
float4 offsetImage(Type* T, IImageWitnessTable* W, void* image, float2 uv)
{
float2 offset = ...;
return W->getvalue(image, uv + offset);
}
```
This translation takes the generic parameters and turns them into ordinary runtime parameters: the type `T` becomes a pointer to a run-time type representation, while the constraint that `T : IImage` becomes a "witness table" of function pointers that, we assume, implements the `IImage` interface for `T`. So, the syntax of generics is *not* tied to static specialization, and can admit a purely runtime implementation as well.
Readers who are familiar with how languages like C++ are implemented might see the "witness table" above and realize that it is kind of like a virtual function table, just being passed alongside the object, rather than stored in its first word.
Using Interfaces Like Types
---------------------------
It is natural for a user to want to write code like the following:
```hlsl
float4 modulateImage(IImage image, float2 uv)
{
float4 factor = ...;
return factor * image.getValue(uv);
}
```
Unlike `offsetImage`, `modulateImage` is trying to use the `IImage` interface as a *type* and not just a constraint.
This code appears to be asking for a dynamic implementation rather than specialization (we'll get back to that...) and so we should be able to implement it similarly to our translation of `offsetImage` to C++.
Something like the following makes a lot of sense:
```c++
struct IImage { Type* T; IImageWitnessTable* W; void* obj; };
float4 modulateImage(IImage image, float2 uv)
{
float4 factor = ...;
return factor * image.W->getvalue(image.obj, uv);
}
```
Similar to the earlier example, there is a one-to-one mapping of the parameters of the Slang function the user wrote to the parameters of the generated C++ function.
To make this work, we had to bundle up the information that used to be separate parameters to the generic as a single value of type `IImage`.
Existential Types
-----------------
It turns out that when we use `IImage` as a type, it is what we'd call an *existential* type.
That is because if I give you a value `img` of type `IImage` in our C++ model, then you know that *there exists* some type `img.T`, a witness table `img.W` proving the type implements `IImage`, and a value `img.obj` of that type.
Existential types are the bread and butter of object-oriented programming.
If I give you an `ID3D11Texture2D*` you don't know what its concrete type is, and you just trust me that some concrete type *exists* and that it implements the interface.
A C++ class or COM component can implement an existential type, with the constraint that the interfaces that a given type can support is limited by the way that virtual function tables are intrusively included inside the memory of the object, rather than externalized.
Many modern languages (e.g., Go) support adapting existing types to new interfaces, so that a "pointer" of interface type is actually a fat pointer: one for the object, and one for the interface dispatch table.
Our examples so far have assumed that the type `T` needs to be passed around separately from the witness table `W`, but that isn't strictly required in some implementations.
In type theory, the most important operation you can do with an existential type is to "open" it, which means to have a limited scope in which you can refer to the constituent pieces of a "bundled up" value of a type like `IImage`.
We could imagine "opening" an existential as something like:
```
void doSomethingCool<T : IImage>(T val);
void myFunc(IImage img)
{
open img as obj:T in
{
// In this scope we know that `T` is a type conforming to `IImage`,
// and `obj` is a value of type `T`.
//
doSomethingCool<T>(obj);
}
}
```
Self-Conformance
----------------
The above code with `doSomethingCool` and `myFunc` invites a much simpler solution:
```
void doSomethingCool<T : IImage>(T val);
void myFunc(IImage img)
{
doSomethingCool(img);
}
```
This seems like an appealing thing for a language to support, but there are some subtle reasons why this isn't possible to support in general.
If we think about what `doSomethingCool(img)` is asking for, it seems to be trying to invoke the function `doSomethingCool<IImage>`.
That function only accepts type parameters that implement the `IImage` interface, so we have to ask ourselves:
Does the (existential) type `IImage` implement the `IImage` interface?
Knowing the implementation strategy outline above, we can re-phrase this question to: can we construct a witness table that implements the `IImage` interface for values of type `IImage`?
For simple interfaces this is sometimes possible, but in the general case there are other desirable language features that get in the way:
* When an interface has associated types, there is no type that can be chosen as the associated type for the interface's existential type. The "obvious" approach of using the constraints on the associated type can lead to unsound logic when interface methods take associated types as parameters.
* When an interface uses the "this type" (e.g., an `IComparable` interface with a `compareTo(ThisType other)` method), it isn't correct to simplify the this type to the interface type (just because you have two `IComarable` values doesn't mean you can compare them - they have to be of the same concrete type!)
* If we allow for `static` method on interfaces, then what implementation would we use for these methods on the interface's existential type?
Encoding Existentials in the IR
-------------------------------
Existentials are encoded in the Slang IR quite simply. We have an operation `makeExistential(T, obj, W)` that takes a type `T`, a value `obj` that must have type `T`, and a witness table `W` that shows how `T` conforms to some interface `I`. The result of the `makeExistential` operation is then a value of the type `I`.
Rather than include an IR operation to "open" an existential, we can instead just provide accessors for the pieces of information in an existential: one to extract the type field, one to extract the value, and one to extract the witness table. These would idiomatically be used like:
```
let e : ISomeInterface = /* some existential */
let T : Type = extractExistentialType(e);
let W : WitnessTbale = extractExistentialWitnessTable(e);
let obj : T = extractExistentialValue(e);
```
Note how the operation to extract `obj` gets its result type from the previously-executed extraction of the type.
Simplifying Code Using Existentials
-----------------------------------
It might seem like IR code generated using existentials can only be implemented using dynamic dispatch.
However, within a local scope it is clear that we can simplify expressions whenever `makeExistential` and `extractExistential*` operations are paired.
For example:
```
let e : ISomeInterface = makeExistential(A, a, X);
...
let B = extractExistentialType(e);
let b : B = extractExistentialValue(e);
let Y = extractExistentialWitnessTable(e);
```
It should be clear in context that we can replace `B` with `A`, `b` with `a`, and `Y` with `X`, after which all of the `extract*` operations and the `makeExistential` operation are dead and can be eliminated.
This kind of simplification works within a single function, as long as there is no conditional logic involving existentials.
We require further transformation passes to allow specialization in more general cases:
* Copy propagation, redundancy elimination and other dataflow optimizations are needed to simplify use of existentials within functions
* Type legalization passes, including some amount of scalarization, are needed to "expose" existential-type fields that are otherwise buried in a type
* Function specialization, is needed so that a function with existential parameters is specialized based on the actual types used at call sites
Transformations just like these are already required when working with resource types (textures/samplers) on targets that don't support first-class computation on resources, so it is possible to share some of the same logic.
Similarly, any effort we put into validation (to ensure that code is written in a way that *can* be simplified) can hopefully be shared between existentials and resources.
Compositions
------------
So far I've only talked about existential types based on a single interface, but if you look at the encoding as a tuple `(obj, T, W)` there is no real reason that can't be generalized to hold multiple witness tables: `(obj, T, W0, ... WN)`. Interface compositions could be expressed at the language level using the `&` operator on interface (or existential) types.
The IR encoding doesn't need to change much to support compositions: we just need to allow multiple witness tables on `makeExistential` and have an index operand on `extractExistentialWitnessTable` to get at the right one.
The hardest part of supporting composition of interfaces is actually in how to linearize the set of interfaces in a way that is stable, so that changing a function from using `IA & IB` to `IB & IA` doesn't change the order in which witness tables get packed into an existential value.
Why are we passing along the type?
----------------------------------
I'm glossing over something pretty significant here, which is why anybody would pass around the type as part of the existential value, when none of our examples so far have made use of it.
This sort of thing isn't very important for languages where interface polymorphism is limited to heap-allocated "reference" types (or values that have been "boxed" into reference types), because the dynamic type of an object can almost always be read out of the object itself.
When dealing with a value type, though, we have to deal with things like making *copies*:
```
interface IWritable { [mutating] void write(int val); }
struct Cell : IWritable { int data; void write(int val) { data = val; } }
T copyAndClobber<T : IWritable>(T obj)
{
T copy = obj;
obj.write(9999);
return copy;
}
void test()
{
Cell cell = { 0 };
Cell result = copyAndClobber(cell);
// what is in `result.data`?
}
```
If we call `copyAndClober` on a `Cell` value, then does the line `obj.write` overwrite the data in the explicit `copy` that was made?
It seems clear that a user would expect `copy` to be unaffected in the case where `T` is a value type.
How does that get implemented in our runtime version of things? Let's imagine some C++ translation:
```
void copyAndClobber(Type* T, IWriteableWitnessTable* W, void* obj, void* _returnVal)
{
void* copy = alloca(T->sizeInBytes);
T->copyConstruct(copy, obj);
W->write(obj, 9999);
T->moveConstruct(_returnVal, copy);
}
```
Because this function returns a value of type `T` and we don't know how big that is, let's assume the caller is passing in a pointer to the storage where we should write the result.
Now, in order to have a local `copy` of the `obj` value that was passed in, we need to allocate some scratch storage, and only the type `T` can know how many bytes we need.
Furthermore, when copying `obj` into that storage, or subsequently copying the `copy` variable into the function result, we need the copy/move semantics of type `T` to be provided by somebody.
This is the reason for passing through the type `T` as part of an existential value.
If we only wanted to deal with reference types, this would all be greatly simplified, because the `sizeInBytes` and the copy/move semantics would be fixed: everything is a single pointer.
All of the same issues arise if we're making copies of existential values:
```
IWritable copyAndClobberExistential(IWritable obj)
{
IWritable copy = obj;
obj.write(9999);
return copy;
}
```
If we want to stay consistent and say that `copy` is an actual copy of `obj` when the underlying type is a value rather than a reference type, then we need the copy/move operations for `IWritable` to handle invoking the copy/move operations of the underlying encapsulated type.
Aside: it should be clear from these examples that implementing generics and existential types with dynamic dispatch has a lot of complexity when we have to deal with value types (because copying requires memory allocation).
It is likely that a first implementation of dynamic dispatch support for Slang would restrict it to reference types (and would thus add a `class` keyword for defining reference types).

View file

@ -1,74 +0,0 @@
Deploying Experimental API Additions
====================================
This page intends to provide guidance to Slang developers when extending the Slang API, particularly when working on experimental features.
It applies to the "COM-lite" Slang API, rather than the deprecated C Slang API (sp* functions).
* Note: This guidance relates to Slang API changes, not to language changes. That is, what Slang does with shader source code across releases is not discussed here.
The goal is to maintain binary compatibility as much as possible between Slang releases, and to aid applications in dealing with changes to Slang.
Slang is distributed as a dynamic library, and there is an expectation from Slang API users that upgrading by installing an updated slang.dll or slang.so will not break their application unnecessarily.
ABI compatibility within the Slang API can be preserved between releases if some rules are followed by developers.
Slang API uses a "COM-lite" structure wherein functionality is exposed through interfaces on objects. If the interfaces never change, ABI compatibility is preserved, but changes happen. When adding or changing interfaces, please observe the following:
1. It is preferred to create *new* COM interfaces when adding new functionality.
* This maintains ABI compatibility.
* Applications must acquire access to the new functionality using QueryInterface(), which will gracefully fail if the slang.dll/slang.so does not implement the functionality.
2. Changes to existing virtual methods in COM interfaces should be avoided, as that is an ABI breakage.
* If a change is required though, change the interface's UUID.
3. New virtual methods _may_ be added (only) to the end of existing COM interface structs.
* This does not disturb the ABI compatibility of the associated vtable. Old apps can remain unaware of the new function pointers appended to the end of the vtable.
* A UUID change is not necessary.
* Note that in the event that a Slang application which uses the added feature is run with an old slang.dll/slang.so, the experience for the user is not as clean as if the added method belongs to a new interface.
Adding Experimental Interfaces
==============================
When the above recommendations cannot be followed, as with features that are expected to be iterated on or are regarded as temporary, there are additional recommendations.
Interfaces that are expected to change must be marked `_Experimental` in their class name and in their UUID name.
For example,
```csharp
/* Experimental interface for doing something cool. This interface is susceptible to ABI breakage. */
struct ICoolNewFeature_Experimental : public ISlangUnknown
{
SLANG_COM_INTERFACE(0x8e12e8e3, 0x5fcd, 0x433e, { 0xaf, 0xcb, 0x13, 0xa0, 0x88, 0xbc, 0x5e, 0xe5 })
virtual SLANG_NO_THROW SlangResult SLANG_MCALL coolMethod() = 0;
};
#define SLANG_UUID_ICoolNewFeature_Experimental ICoolNewFeature_Experimental::getTypeGuid()
```
Note: Use uuidgen to generate IIDs new interfaces.
Removing Experimental Interfaces
================================
By the nature of being marked "Experimental", users have been warned that the interfaces are not officially supported and may be removed. You may simply delete the class and UUID, e.g. "ICoolNewFeature_Experimental" struct may be deleted from slang.h along with the definition of SLANG_UUID_ICoolNewFeature_Experimental.
This will show up in applications as QueryInterface failures.
It is nice, but not required, to retain the interface declarations for some time after removing internal support before deleting them from slang.h, so that applications have time to remove their dependence on the unsupported feature while still being able to compile in the interim.
Changing Experimental Interfaces
================================
Backwards incompatible changes to Slang COM interfaces should be accompanied with a UUID change.
In the event that an old application runs with a new slang library, applications are more capable of gracefully handling an unavailable interface than a changed one. The former may be still be functional, or include a helpful error message, whereas the latter is most likely a crash of some sort.
Promoting Experimental Interfaces
=================================
The class name and the UUID name should be changed in slang.h and in the slang source code, e.g. Rename "ICoolNewFeature_Experimental" to just "ICoolFeature".
The SLANG_UUID for the interface should be renamed to omit "EXPERIMENTAL" but its value should remain the same. This is because, if there are no backwards incompatible changes that accompany the promotion from experimental to permanent, applications written against the experimental version can continue working against Slang libraries where the interface was promoted to permanent.

View file

@ -1,486 +0,0 @@
Interfaces Design
=================
This document intends to lay out the proposed design for a few inter-related features in Slang:
- Interfaces
- Associated Types
- Generics
Introduction
------------
The basic problem here is not unique to shader programming: you want to write code that accomplished one task, while abstracting over how to accomplish another task.
As an example, we might want to write code to integrate incident radiance over a list of lights, while not concerning ourself with how to evaluate a reflectance function at each of those lights.
If we were doing this task on a CPU, and performance wasn't critical, we could probably handle this with higher-order functions or an equivalent mechanism like function pointers:
float4 integrateLighting(
Light[] lights,
float4 (*brdf)(float3 wi, float3 wi, void* userData),
void const* brdfUserData)
{
float4 result = 0;
for(/* ... */) {
// ...
result += brdf(wi, wo, brdfUserDat);
}
return result;
}
Depending on the scenario, we might be able to generate statically specialized code by using templates instead:
template<typename BRDF>
float4 integrateLighting(Light[] lights, BRDF const& brdf)
{
// ...
result += brdf(wi, wo);
// ...
}
Current shading languages support neither higher-order functions nor templates/generics, so neither of these options is viable.
Instead practitioners typically use preprocessor techniques to either stich together the final code, or to substitute in different function/type definitions to make a definition like `integrateLighting` reusable.
These ad hoc approaches actually work well in practice; we aren't proposing to replace them *just* to make code abstractly "cleaner."
Rather, we've found that the ad hoc approaches end up interacting poorly with the resource binding model in modern APIs, so that *something* less ad hoc is required to achieve our performance goals.
At that point, we might as well ensure that the mechanism we introduce is also a good fit for the problem.
Overview
--------
The basic idea for our approach is as follows:
- Start with the general *semantics* of a generic-based ("template") approach
- Use the accumulated experience of the programming language community to ensure that our generics are humane (in other words: not like C++)
- Expore the possibility of syntax sugar to let people use more traditional OOP-style syntax when it can reduce verbosity without harming understanding
In general, our conceptual model is being ripped off wholesale from Rust and Swift.
The basic design principle is "when in doubt, do what Swift does."
Interfaces
----------
An **interface** in Slang is akin to a `protocol` in Swift or a `trait` in Rust.
The choice of the `interface` keyword is to highlight the overlap with the conceptually similar construct that appeared in Cg, and then later in HLSL.
### Declaring an interface
An interface is a named collection of **requirements**; any type that **implements** the interface must provide definitions that satisfy those requirements.
Here is a simple interface, with one requirement:
interface Light
{
float3 illuminate(float3 P_world);
}
The `Light` interface requires a (member) function called `illuminate` with the given signature.
### Declaring that a type implementats an interface
A user-defined `struct` type can declare that it implements an interface, by using conventional "inheritance" syntax:
struct PointLight : Light
{
float3 P_light;
float3 illuminate(float3 P_world)
{
float distance = length(P_light - P_world);
// ...
}
}
It is a static error if a type declares that it implements an interface, but it does not provide all of the requirements:
struct BadLight : Light
{
// ERROR: type 'BadLight' cannot implement 'Light'
// because it does not provide the required 'illuminate' function
}
### Interface Inheritance
While this document does not propose general notions of inheritance be added to Slang, it does make sense to allow an interface to inherit from zero or more other interfaces:
interface InfinitessimalLight : Light
{
float3 getDirection(float3 P_world);
}
In this case the `InfinitessimalLight` interface inherits from `Light`, and declares one new requirement.
In order to check that a type implements `InfinitessimalLight`, the compiler will need to check both that it implements `Light` and that it provides the new "direct" requirements in `InfinitessimalLight`.
Declaring that a type implements an interface also implicitly declares that it implements all the interfaces that interface transitively inherits from:
struct DirectionalLight : InfinitessimalLight
{
float3 L;
float3 dir;
float3 getDirection(float3 P_world) { return dir; }
float3 illuminate(float3 P_world)
{
// Okay, this is the point where I recognize
// that this function definition is not
// actually reasonable for a light...
}
### Interfaces and Extensions
It probably needs its own design document, but Slang currently has very basic support for `extension` declarations that can add members to an existing type.
These blocks correspond to `extension` blocks in Swift, or `impl` blocks in Rust.
This can be used to declare that a type implements an interface retroactively:
extension PointLight : InfinitessimalLight
{
float3 getDirection(float3 P_world)
{
return normalize(P_light - P_world);
}
}
In this case we've used an extension to declare the `PointLight` also implements `InfinitessimalLight`. For the extension to type-check we need to provide the new required function (the compiler must recognize that the implementation of `Light` was already provided by the original type definition).
There are some subtleties around using extensions to add interface implementations:
- If the type already provides a method that matches a requireemnt, can the extension "see" it to satisfying new requirements?
- When can one extension "see" members (or interface implementations) added by another?
A first implementation can probably ignore the issue of interface implementations added by extensions, and only support them directly on type definitions.
Generics
--------
All of the above discussion around interfaces neglected to show how to actually *use* the fact that, e.g., `PointLight` implements the `Light` interface.
That is intentional, because at the most basic level, interfaces are designed to be used in the context of **generics**.
### Generic Declarations
The Slang compiler currently has some ad hoc support for generic declarations that it uses to implement the HLSL standard module (which has a few generic types).
The syntax for those is currently very bad, and it makes sense to converge on the style for generic declarations used by C# and Swift:
float myGenericFunc<T>(T someValue);
Types can also be generic:
struct MyStruct<T> { float a; T b; }
Ideally we should also allow interfaces and interface requirements to be generic, but there will probably be some limits due to implementation complexity.
### Type Constraints
Unlike C++, Slang needs to be able to type-check the body of a generic function ahead of time, so it can't rely on `T` having particular members:
// This generic is okay, because it doesn't assume anything about `T`
// (other than the fact that it can be passed as input/output)
T okayGeneric<T>(T a) { return a; }
// This generic is not okay, because it assumes that `T` supports
// certain operators, and we have no way of knowing it this is true:
T notOkayGeneric<T>(T a) { return a + a; }
In order to rely on non-trivial operations in a generic parameter type like `T`, the user must **constrain** the type parameter using an interface:
float3 mySurfaceShader<L : Light>(L aLight)
{
return aLight.illuminate(...);
}
In this example, we have constrained the type parameter `L` so that it must implement the interface `Light`.
As a result, in the body of the function, the compiler can recognize that `aLight`, which is of type `L`, must implement `Light` and thus have a member `illuminate`.
When calling a function with a constrained type parameter, the compiler must check that the actual type argument (whether provided explicitly or inferred) implements the interface given in the constraint:
mySurfaceShader<PointLight>(myPointLight); // OK
mySurfaceShader(myPointLight); // equivalent to previous
mySurfaceShader(3.0f); // ERROR: `float` does not implement `Light`
Note that in the erroneous case, the error is reported at the call site, rather than in the body of the callee (as it would be for C++ templates).
For cases where we must constrain a type parameter to implement multiple interfaces, we can join the interface types with `&`:
interface Foo { void foo(); }
interface Bar { void bar(); }
void myFunc<T : Foo & Bar>(T val)
{
val.foo();
val.bar();
}
If we end up with very complicated type constraints, then it makes sense to support a "`where` clause" that allows requirements to be stated outside of the generic parameter list:
void myFunc<T>(T val)
where T : Foo,
T : Bar
{}
Bot the use of `&` and `where` are advanced features that we might cut due to implementation complexity.
### Value Parameters
Because HLSL has generics like `vector<float,3>` that already take non-type parameters, the language will need *some* degree of support for generic parameters that aren't types (at least integers need to be supported).
We need syntax for this that doesn't bloat the common case.
In this case, I think that what I've used in the current Slang implementation is reasonable, where a value parameter needs a `let` prefix:
void someFunc<
T, // type parameter
T : X, // type parameter with constraint
T = Y, // type parameter with default
T : X = Y, // type parameter with constraint and default
let N : int, // value parameter (type must be explicit)
let N : int = 3> // value parameter with default
()
{ ... }
We should also extend the `where` clauses to support inequality constraints on (integer) value parameters to enforce rules about what ranges of integers are valid.
The front-end should issue error messages if it can statically determine these constraints are violated, but it should probably defer full checking until the IR (maybe... we need to think about how much of a dependent type system we are willing to have).
Associated Types
----------------
While the syntax is a bit different, the above mechanisms have approximately the same capabilities as Cg interfaces.
What the above approach can't handle (and neither can Cg) is a reusable definition of a surface material "pattern" that might blend multiple material layers to derive parameters for a specific BRDF.
That is, suppose we have two BRDFs: one with two parameters, and one with six.
Different surface patterns may want to target different BRDFs.
So if we write a `Material` interface like:
interface Material
{
BRDFParams evaluatePattern(float2 uv);
}
Then what should `BRDFParams` be? The two-parameter or six-parameter case?
An **associated type** is a concept that solves exactly this problem.
We don't care *what* the concrete type of `BRDFParams` is, so long as *every* implementation of `Material` has one.
The exact `BRDFParams` type can be different for each implementation of `Material`; the type is *associated* with a particular implementation.
We will crib our syntax for this entirely from Swift, where it is verbose but explicit:
interface Material
{
associatedtype BRDFParams;
BRDFParams evaluatePattern(float2 uv);
float3 evaluateBRDF(BRDFParams param, float3 wi, float3 wo);
}
In this example we've added an associated type requirement so that every implementation of `Material` must supply a type named `BRDFParams` as a member.
We've also added a requirement that is a function to evaluate the BRDF given its parameters and incoming/outgoing directions.
Using this declaration one can now define a generic function that works on any material:
float3 evaluateSurface<M : Material, L : Light>(
M material,
L[] lights,
float3 P_world,
float2 uv)
{
P.BRDFParams brdfParams = material.evaluatePattern(uv);
for(...)
{
L light = lights[i];
// ...
float3 reflectance = material.evaluateBRDF(brdfParams, ...);
}
}
Some quick notes:
- The use of `associatedtype` (for associated types) and `typealias` (for `typedef`-like definitions) as distinct keywords in Swift was well motivated by their experience (they used to use `typealias` for both). I would avoid having the two cases be syntactically identical.
- Swift has a pretty involved inference system where a type doesn't actually need to explicitly provide a type member with the chosen name. Instead, if you have a required method that takes or returns the associated type, then the compiler can infer what the type is by looking at the signature of the methods that meet other requirements. This is a complex and magical feature, and we shouldn't try to duplicate it.
- Both Rust and Swift call this an "associated type." They are related to "virtual types" in things like Scala (which are in turn related to virtual classes in beta/gbeta). There are similar ideas that arise in Haskell-like languages with type classes (IIRC, the term "functional dependencies" is relevant).
### Alternatives
I want to point out a few alternatives to the `Material` design above, just to show that associated types seem to be an elegant solution compared to the alternatives.
First, note that we could break `Material` into two interfaces, so long as we are allowed to place type constraints on associated types:
interface BRDF
{
float3 evaluate(float3 wi, float3 wo);
}
interface Material
{
associatedtype B : BRDF;
B evaluatePattern(float2 uv);
}
This refactoring might be cleaner if we imagine that a shader library would have family of reflectance functions (implementing `BRDF`) and then a large library of material patterns (implementing `Material`) - we wouldn't want each and every material to have to implement a dummy `evaluateBRDF` that just forwards to a BRDF instance nested in it.
Looking at that type `B` there, we might start to wonder if we could just replace this with a generic type parameter on the interface:
interface Material< B : BRDF >
{
B evaluatePattern(float2 uv);
}
This would change any type that implements `Material`:
// old:
struct MyMaterial : Material
{
typealias B = GGX;
GGX evaluatePattern(...) { ... }
}
// new:
struct MyMaterial : Material<GGX>
{
GGX evaluatePattern(...) { ... }
}
That doesn't seem so bad, but it ignores the complexity that arises at any use sites, e.g.:
float3 evaluateSurface<B : BRDF, M : Material<B>, L : Light>(
M material,
L[] lights,
float3 P_world,
float2 uv)
{ ... }
The type `B` which is logically an implementation detail of `M` now surfaces to the generic parameter list of any function that wants to traffic in materials.
This reduces the signal/noise ratio for anybody reading the code, and also means that any top-level code that is supposed to be specializing this function (suppose this was a fragment entry point) now needs to understand how to pick apart the `Material` it has on the host side to get the right type parameters.
This kind of issue has existed in the PL community at least as far back as the ML module system (it is tough to name search, but the concepts of "parameterization" vs. "fibration" is relevant here), and the Scala researchers made a clear argument (I think it was in the paper on "un-types") that there is a categorical distinction between the types that are logicall the *inputs* to an abstraction, and the types that are logically the *outputs*. Generic type parameters and associated types handle these two distinct roles.
Returning an Interface
----------------------
The revised `Material` definition:
interface BRDF
{
float3 evaluate(float3 wi, float3 wo);
}
interface Material
{
associatedtype B : BRDF;
B evaluatePattern(float2 uv);
}
has a function `evaluatePattern` that returns a type that implements an interface.
In the case where the return type is concrete, this isn't a problem (and the nature of associated types means that `B` will be concrete in any actual concrete implementation of `Material`).
There is an open question of whether it is ever necessary (or even helpful) to have a function that returns a value of *some* type known to implement an interface, without having to state that type in the function signature.
This is a point that has [come up](https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md) in the Rust world, where they have discussed using a keyword like `some` to indicate the existential nature of the result type:
// A function that returns *some* implementation of `Light`
func foo<T>() -> some Light;
The Rust proposal linked above has them trying to work toward `impl` as the keyword, and allowing it in both argument and result positions (to cover both universal and existential quantification).
In general, such a feature would need to have many constraints:
- The concrete return type must be fixed (even if clients of the function should be insulated from the choice), given the actual generic arguments provided.
- If the existential is really going to be sealed, then the caller shouldn't be allowed to assume anything *except* that two calls to the same function with identical generic arguments should yield results of identical type.
Under those constraints, it is pretty easy to see that an existential-returning method like:
interface Foo<T>
{
func foo<U>() -> some Bar;
}
can in principle be desugared into:
interface Foo<T>
{
associatedtype B<U> : Bar;
func foo<U>() -> B<U>;
}
with particular loss in what can be expressed.
The same desugaring approach should apply to global-scope functions that want to return an existential type (just with a global `typealias` instead of an `associatedtype`).
It might be inconvenient for the user to have to explicitly write the type-level expression that yields the result type (consider cases where C++ template metaprogrammers would use `auto` as a result type), but there is really no added power.
Object-Oriented Sugar
---------------------
Having to explicitly write out generic parameter lists is tedious, especially in the (common) case where we will have exactly one parameter corresponding to each generic type parameter:
// Why am I repeating myself?!
//
void foo<L : Light, M : Material, C : Camera)(
L light, M material, C camera);
The intent seems to be clear if we instead write:
void foo(Light light, Material material, Camera camera);
We could consider the latter to be sugar for the former, and allow users to write in familiar syntax akin to what ws already supported in Cg.
We'd have to be careful with such sugar, though, because there is a real and meaningful difference between saying:
- "`material` has type `Material` which is an interface type"
- "`material` has type `M` where `M` implements `Material`"
In particular, if we start to work with associated types:
let b = material.evaluatePattern(...);
It makes sense to say that `b` has type `M.BRDF`.
It does **not** make sense to say that `b` has type `Material.BRDF`, because there is no such concrete type.
(A third option is to say that `b` has type `material.BRDF`, which is basically the point where you have "virtual types" because we are now saying the type is a member of the *instance* and not of an enclosing *type*)
Note that the issue of having or not having object-oriented sugar is technically orthogonal from whether we allow "existential return types."
However, allowing the user to think of interfaces in traidtional OOP terms leads to it being more likely that they will try to declare:
- functions that return an interface type
- local variables of interface type (which they might even assign to!)
- fields of interface type in their `struct`s
All of these complicate the desugaring step, because we would de facto have types/functions that mix up two stages of evaluation: a compile-time type-level step and a run-time value-level step.
Ultimately, we'd probably need to express these by having a multi-stage IR (with two stages) which we optimize in the staged setting before stage-splitting to get separate type-level and value-level operations (akin to the desugaring for existential return types I described above).
My sense is that a certain amount of multi-stage programming may already be needed to deal with certain HLSL/GLSL idioms. In particular:
- GLSL supports passing unsigned arrays (e.g., `int[] a`) to a function, and then having the function use the size of the array (`a.length`) to do loops, etc. These would need to be lowered to distinct SPIR-V code for every array size used (if I understand the restrictions correctly), and so the feature is perhaps best thought of as passing both a compile-time integer parameter and a run-time array parameter (where the size comes from that parameter)
- HLSL and GLSL both have built-in functions where certain parameters are required to be compile-time constants. A feature-complete front-end must detect when calls to these functions are valid, and report errors to the user. In order to make the errors easier to explain to the user, it would be helpful to have an explicit notion of constant-rate computation, and require that the user express explicit constant-rate parameters/expressions.
All of this ties into the question of whether we need/want to support more general kinds of compile-time evaluation for specialization (e.g., statically-determine `if` statements or loops).
Other Languages
---------------
It is worth double-checking whether implementing all of this from scratch in Slang is a good idea, or if there is somewhere else we can achieve similar results more quickly:
- The Metal shading language has much of what we'd want. It is based on C++ templates, which are maybe not the ideal mechanism, and the compiler is closed-source so we can't easily add functionality. Still, it should be possible to prototype a lot of what we want on top of Metal 2.
- The open-source HLSL compiler doesn't support any of the new ideas here, but it may be that adding them to `dxc` would be faster than adding them to the Slang project code. Using `dxc` is a no-go for some of the other Slang requirements (that come from our users on the Falcor project).
- Swift already supports almost every thing on our list of requirements, but as it stands today there is no easy path to using it for low-level GPU code generation. It also fails to meet our goals for incremental adoption, high-level source output, etc.
In the long run, however, the Swift compiler seems like an attractive intercept for this work, because their long-term roadmap seems like it will close a lot of the gap with what we've done so far.
Conclusion
----------
This document has described the basic syntax and semantics for three related features -- interfaces, generics, and associated types -- along with some commentary on longer-term directions.
My expectation is that we will use the syntax as laid down here, unless we have a very good reason to depart from it, and we will prioritize implementation work as needed to get interesting shader library functionality up and running.

View file

@ -1,275 +0,0 @@
The Design of Slang's Intermediate Representation (IR)
======================================================
This document details some of the important design choices for Slang's IR.
Goals and Non-Goals
-------------------
The IR needs to balance many goals which can sometimes come into conflict.
We will start by enumerating these goals (and related non-goals) explicitly so that we can better motivate specific design choices.
* Obviously it must be simple to lower any source code in Slang code to the IR. It is however a non-goal for the lowering process to be lossless; we do not need to recover source-level program structure from the IR.
* The IR must be amenable to standard dataflow analyses and optimizations. It should be possible to read a paper on a compiler algorithm or technique and apply it to our IR in a straightforward manner, and with the expected asymptotic efficiency.
* As a particular case of analysis and optimization, it should be possible to validate flow-dependent properties of an input function/program (e.g., whether an `[unroll]` loop is actually unrollable) using the IR, and emit meaningful error messages that reference the AST-level names/locations of constructs involved in an error.
* It should be possible to compile modules to the IR separately and then "link" them in a way that depends only on IR-level (not AST-level) constructs. We want to allow changing implementation details of a module without forcing a re-compile of IR code using that module (what counts as "implementation details") is negotiable.
* There should be a way to serialize IR modules in a round-trip fashion preserving all of the structure. As a long-term goal, the serialized format should provide stability across compiler versions (working more as an IL than an IR)
* The IR must be able to encode "generic" (type-parameterized) constructs explicitly, and to express transformations from generic to specialized (or dynamic-dispatch) code in the IR. In particular, it must be possible for a module to make use of generic defined in another (separately-compiled) module, with validation performed before linking, and specialization performed after.
* The IR must be able to express code that is close to the level of abstraction of shader intermediate languages (ILs) like SPIR-V and DXIL, so that we can minimize the amount of work required (and the number of issues that can arise) when translating the IR to these targets. This can involve lowering and legalization passes to match the constraints of those ILs, but it should not require too much work to be done outside of the IR.
* It should be possible to translate code in the IR back into high-level-language code, including things like structured control-flow constructs.
* Whenever possible, invariants required by the IR should be built into its structure so that they are easier to maintain.
* We should strive to make the IR encoding, both in memory and when serialized, as compact as is practically possible.
Inspirations
------------
The IR design we currently use takes inspiration from three main sources:
* The LLVM project provides the basic inspiration for the approach to SSA, such as using a typed IR, the decision to use the same object to represent an instruction and the SSA value it produces, and the push to have an extremely simple `replaceAllUsesWith` primitive. It is easy to forget that it is possible to design a compiler with different design decisions; the LLVM ones just happen to both be well-motivated and well-known.
* The Swift IL (SIL) provides the inspiration for our approach for encoding SSA "phi nodes" (blocks with arguments), and also informs some of how we have approached encoding generics and related features like existential types.
* The SPIR-V IL provides the inspiration for the choice to uniformly represent types as instructions, for how to encode "join points" for structured control flow, and for the concept of "decorations" for encoding additional metadata on instructions.
Key Design Decisions
--------------------
### Everything is an Instruction
The Slang IR strives for an extremely high degree of uniformity, so almost every concept in the IR is ultimately just an instruction:
* Ordinary add/sub/mul/etc. operations are instructions, as are function calls, branches, function parameters, etc.
* Basic blocks in functions, as well as functions themselves are "parent instructions" that can have other instructions as children
* Constant values (e.g., even `true` and `false`) are instructions
* Types are instructions too, and can have operands (e.g., a vector type is the `VectorType` instruction applied to operands for the element type and count)
* Generics are encoded entirely using ordinary instructions: a generic is encoded like a function that just happens to do computation at the type level
* It isn't true right now, but eventually decorations will also be instructions, so that they can have operands like any other instruction
* An overall IR module is itself an instruction so that there is a single tree that owns everything
This uniformity greatly simplifies the task of supporting generics, and also means that operations that need to work over all instructions, such as cloning and serialization, can work with a single uniform representation and avoid special-casing particular opcodes.
The decision to use an extremely uniform design, even going as far to treat types as "ordinary" instructions, is similar to SPIR-V, although we do not enforce many of the constraints SPIR-V does on how type and value instructions can be mixed.
### Instructions Have a Uniform Structure
Every instruction has:
* An opcode
* A type (the top-level module is the only place where this can be null)
* Zero or more operands
* Zero or more decorations
* Zero or more children
Instructions are not allowed to have any semantically-relevant information that is not in the above list.
The only exception to this rule is instructions that represent literal constants, which store additional data to represent their value.
The in-memory encoding places a few more restrictions on top of this so that, e.g., currently an instruction can either have operands of children, but not both.
Because everything that could be used as an operand is also an instruction, the operands of an instruction are stored in a highly uniform way as a contiguous array of `IRUse` values (even the type is contiguous with this array, so that it can be treated as an additional operand when required).
The `IRUse` type maintains explicit links for use-def information, currently in a slightly bloated fashion (there are well-known techniques for reducing the size of this information).
### A Class Hierarchy Mirrored in Opcodes
There is a logical "class hierarchy" for instructions, and we support (but do not mandate) declaring a C++ `struct` type to expose an instruction or group of instructions.
These `struct` types can be helpful to encode the fact that the program knows an instruction must/should have a particular type (e.g., having a function parameter of type `IRFunction*` prevents users from accidentally passing in an arbitrary `IRInst*` without checking that it is a function first), and can also provide convenience accessors for operands/children.
Do make "dynamic cast" operations on this class hierarchy efficient, we arrange for the instruction opcodes for the in-memory IR to guarantee that all the descendents of a particular "base class" will occupy a contiguous range of opcodes. Checking that an instruction is in that range is then a constant-time operation that only looks at its opcode field.
There are some subtleties to how the opcodes are ordered to deal with the fact that some opcodes have a kind of "multiple inheritance" thing going on, but that is a design wart that we should probably remove over time, rather than something we are proud of.
### A Simpler Encoding of SSA
The traditional encoding of SSA form involves placing "phi" instructions at the start of blocks that represent control-flow join points where a variable will take on different values depending on the incoming edge that is taken.
There are of course benefits to sticking with tradition, but phi instructions also have a few downsides:
- The operands to phi instructions are the one case where the "def dominates use" constraint of SSA appears to be violated. I say "appears" because officially the action of a phi occurs on the incoming edge (not in the target block) and that edge will of course be dominated by the predecessor block. It still creates a special case that programmers need to be careful about. This also complicates serialization in that there is no order in which the blocks/instructions of a function can be emitted that guarantees that every instruction always precedes all of its uses in the stream.
- All of the phi instructions at the start of the block must effectively operate in parallel, so that they all "read" from the correct operand before "writing" to the target variable. Like the above special case, this is only a problem for a phi related to a loop back-edge. It is of course possible to always remember the special interpretation of phi instructions (that they don't actually execute sequentially like every other instruction in a block), but its another special case.
- The order of operands to a phi instruction needs to be related back to the predecessor blocks, so that one can determine which value is to be used for which incoming edge. Any transformation that modifies the CFG of a function needs to be careful to rewrite phi instructions to match the order in which predecessors are listed, or else the compiler must maintain a side data structure that remembers the mapping (and update it instead).
- Directly interpreting/executing code in an SSA IR with phi instructions is made more difficult because when branching to a block we need to immediately execute any phi instructions based on the block from which we just came. The above issues around phis needing to be executed in parallel, and needing to track how phi operands relate to predecessor blocks also add complexity to an interpreter.
Slang ditches traditional phi functions in favor of an alternative that matches the Swift IL (SIL).
The idea doesn't really start in Swift, but rather in the existing observation that SSA form IR and a continuation-passing style (CPS) IR are semantically equivalent; one can encode SSA blocks as continuation functions, where the arguments of the continuation stand in for the phi instructions, and a branch to the block becomes just a call.
Like Swift, we do not use an explicit CPS representation, but instead find a middle ground of a traditional SSA IR where instead of phi instructions basic blocks have parameters.
The first N instructions in a Slang basic block are its parameters, each of which is an `IRParam` instruction.
A block that would have had N phi instructions now has N parameters, but the parameters do not have operands.
Instead, a branch instruction that targets that block will have N *arguments* to match the parameters, representing the values to be assigned to the parameters when this control-flow edge is taken.
This encoding is equivalent in what it represents to traditional phi instructions, but nicely solves the problems outlined above:
- The phi operands in the successor block are now arguments in the *predecessor* block, so that the "def dominates use" property can be enforced without any special cases.
- The "assignment" of the argument values to parameters is now encoded with a single instruction, so that the simultaneity of all the assignments is more clear. We still need to be careful when leaving SSA form to obey those semantics, but there are no tricky issues when looking at the IR itself.
- There is no special work required to track which phi operands come from which predecessor block, since the operands are attached to the terminator instruction of the predecessor block itself. There is no need to update phi instructions after a CFG change that might affect the predecessor list of a block. The trade-off is that any change in the *number* of parameters of a block now requires changes to the terminator of each predecessor, but that is a less common change (isolated to passes that can introduce or eliminate block parameters/phis).
- It it much more clear how to give an operational semantics to a "branch with arguments" instead of phi instructions: compute the target block, copy the arguments to temporary storage (because of the simultaneity requirement), and then copy the temporaries over the parameters of the target block.
The main caveat of this representation is that it requires branch instructions to have room for arguments to the target block. For an ordinary unconditional branch this is pretty easy: we just put a variable number of arguments after the operand for the target block. For branch instructions like a two-way conditional, we might need to encode two argument lists - one for each target block - and an N-way `switch` branch only gets more complicated.
The Slang IR avoids the problem of needing to store arguments on every branch instruction by banning *critical edges* in IR functions that are using SSA phis/parameters. A critical edge is any edge from a block with multiple successors (meaning it ends in a conditional branch) to one with multiple predecessors (meaning it is a "join point" in the CFG).
Phi instructions/parameters are only ever needed at join points, and so block arguments are only needed on branches to a join point.
By ruling out conditional branches that target join points, we avoid the need to encode arguments on conditional branch instructions.
This constraint could be lifted at some point, but it is important to note that there are no programs that cannot be represented as a CFG without critical edges.
### A Simple Encoding of the CFG
A traditional SSA IR represents a function as a bunch of basic blocks of instructions, where each block ends in a *terminator* instruction.
Terminators are instructions that can branch to another block, and are only allowed at the end of a block.
The potential targets of a terminator determine the *successors* of the block where it appears, and contribute to the *predecessors* of any target block.
The successor-to-predecessor edges form a graph over the basic blocks called the control-flow graph (CFG).
A simple representation of a function would store the CFG explicitly as a graph data structure, but in that case the data structure would need to be updated whenever a change is made to the terminator instruction of a branch in a way that might change the successor/predecessor relationship.
The Slang IR avoids this maintenance problem by noting an important property.
If block `P`, with terminator `t`, is a predecessor of `S`, then `t` must have an operand that references `S`.
In turn, that means that the list of uses of `S` must include `t`.
We can thus scan through the list of predecessors or successors of a block with a reasonably simple algorithm:
* To find the successors of `P`, find its terminator `t`, identify the operands of `t` that represent successor blocks, and iterate over them. This is O(N) in the number of outgoing CFG edges.
* To find the predecessors of `S`, scan through its uses and identify users that are terminator instructions. For each such user if this use is at an operand position that represents a successor, then include the block containing the terminator in the output. This is O(N) in the number of *uses* of a block, but we expect that to be on the same order as the number of predecessors in practice.
Each of these actually iterates over the outgoing/incoming CFG *edges* of a block (which might contain duplicates if one block jumps to another in, e.g, multiple cases of a `switch`).
Sometimes you actually want the edges, or don't care about repeats, but in the case where you want to avoid duplicates the user needs to build a set to deduplicate the lists.
The clear benefit of this approach is that the predecessor/successor lists arise naturally from the existing encoding of control-flow instructions. It creates a bit of subtle logic when walking the predecessor/successor lists, but that code only needs to be revisited if we make changes to the terminator instructions that have successors.
### Explicit Encoding of Control-Flow Join Points
In order to allow reconstruction of high-level-language source code from a lower-level CFG, we need to encode something about the expected "join point" for a structured branch.
This is the logical place where control flow is said to "reconverge" after a branch, e.g.:
```hlsl
if(someCondition) // join point is "D"
{
A;
}
else
{
B;
if(C) return;
}
D;
```
Note that (unlike what some programming models would say) a join point is *not* necessarily a postdominator of the conditional branch. In the example above the block with `D` does not postdominate the block with `someCondition` nor the one with `B`. It is even possible to construct cases where the high-level join point of a control-flow construct is unreachable (e.g., the block after an infinite loop).
The Slang IR encodes structured control flow by making the join point be an explicit operand of a structured conditional branch operation. Note that a join-point operand is *not* used when computing the successor list of a block, since it does not represent a control-flow edge.
This is slightly different from SPIR-V where join points ("merge points" in SPIR-V) are encoded using a metadata instruction that precedes a branch. Keeping the information on the instruction itself avoids cases where we move one but not the other of the instructions, or where we might accidentally insert code between the metadata instruction and the terminator it modifies.
In the future we might consider using a decoration to represent join points.
When using a loop instruction, the join point is also the `break` label. The SPIR-V `OpLoopMerge` includes not only the join point (`break` target) but also a `continue` target. We do not currently represent structured information for `continue` blocks.
The reason for this is that while we could keep structured information about `continue` blocks, we might not be able to leverage it when generating high-level code, because the syntactic form of a `for` loop (the only construct in C-like languages where `continue` can go somewhere other than the top of the loop body) only allows an *expression* for the continue clause and not a general *statement*, but we cannot guarantee that after optimization the code in an IR-level "continue clause" would constitute a single expression.
The approach we use today means that the code in "continue clause" might end up being emitted more than once in final code; this is deemed acceptable because it is what `fxc` already does.
When it comes time to re-form higher-level structured control flow from Slang IR, we use the structuring information in the IR to form single-entry "regions" of code that map to existing high-level control-flow constructs (things like `if` statements, loops, `break` or `continue` statements, etc.).
The current approach we use requires the structuring information to be maintained by all IR transformations, and also currently relies on some invariants about what optimizations are allowed to do (e.g., we had better not introduce multi-level `break`s into the IR).
In the future, it would be good to investigate adapting the "Relooper" algorithm used in Emscripten so that we can recover valid structured control flow from an arbitrary CFG; for now we put off that work.
If we had a more powerful restructuring algorithm at hand, we could start to support things like multi-level `break`, and also ensure that `continue` clauses don't lead to code duplication any more.
## IR Global and Hoistable Value Deduplication
Types, constants and certain operations on constants are considered "global value" in the Slang IR. Some other insts like `Specialize()` and `Ptr(x)` are considered as "hoistable" insts, in that they will be defined at the outer most scope where their operands are available. For example, `Ptr(int)` will always be defined at global scope (as direct children of `IRModuleInst`) because its only operand, `int`, is defined at global scope. However if we have `Ptr(T)` where `T` is a generic parameter, then this `Ptr(T)` inst will be always be defined in the block of the generic. Global and hoistable values are always deduplicated and we can always assume two hoistable values with different pointer addresses are distinct values.
The `IRBuilder` class is responsible for ensuring the uniqueness of global/hoistable values. If you call any `IRBuilder` methods that creates a new hoistable instruction, e.g. `IRBuilder::createIntrinsicInst`, `IRBuilder::emitXXX` or `IRBuilder::getType`, `IRBuilder` will check if an equivalent value already exists, and if so it returns the existing inst instead of creating a new one.
The trickier part here is to always maintain the uniqueness when we modify the IR. When we update the operand of an inst from a non-hoistable-value to a hoistable-value, we may need to hoist `inst` itself as a result. For example, consider the following code:
```
%1 = IntType
%p = Ptr(%1)
%2 = func {
%x = ...;
%3 = Ptr(%x);
%4 = ArrayType(%3);
%5 = Var (type: %4);
...
}
```
Now consider the scenario where we need to replace the operand in `Ptr(x)` to `int` (where `x` is some non-constant value), we will get a `Ptr(int)` which is now a global value and should be deduplicated:
```
%1 = IntType
%p = Ptr(%1)
%2 = func {
%x = ...;
//%3 now becomes %p.
%4 = ArrayType(%p);
%5 = Var (type: %4);
...
}
```
Note this code is now breaking the invariant that hoistable insts are always defined at the top-most scope, because `%4` becomes is no longer dependent on any local insts in the function, and should be hoisted to the global scope after replacing `%3` with `%p`. This means that we need to continue to perform hoisting of `%4`, to result this final code:
```
%1 = IntType
%p = Ptr(%1)
%4 = ArrayType(%p); // hoisted to global scope
%2 = func {
%x = ...;
%5 = Var (type: %4);
...
}
```
As illustrated above, because we need to maintain the invariants of global/hoistable values, replacing an operand of an inst can have wide-spread effect on the IR.
To help ensure these invariants, we introduce the `IRBuilder.replaceOperand(inst, operandIndex, newOperand)` method to perform all the cascading modifications after replacing an operand. However the `IRInst.setOperand(idx, newOperand)` will not perform the cascading modifications, and using `setOperand` to modify the operand of a hoistable inst will trigger a runtime assertion error.
Similarly, `inst->replaceUsesWith` will also perform any cascading modifications to ensure the uniqueness of hoistable values. Because of this, we need to be particularly careful when using a loop to iterate the IR linked list or def-use linked list and call `replaceUsesWith` or `replaceOperand` inside the loop.
Consider the following code:
```
IRInst* nextInst = nullptr;
for (auto inst = func->getFirstChild(); inst; inst = nextInst)
{
nextInst = inst->getNextInst(); // save a copy of nestInst
// ...
inst->replaceUsesWith(someNewInst); // Warning: this may be unsafe, because nextInst could been moved to parent->parent!
}
```
Now imagine this code is running on the `func` defined above, imagine we are now at `inst == %3` and we want to replace `inst` with `Ptr(int)`. Before calling `replaceUsesWith`, we have stored `inst->nextInst` to `nextInst`, so `nextInst` is now `%4`(the array type). Now after we call `replaceUsesWith`, `%4` is hoisted to global scope, so in the next iteration, we will start to process `%4` and follow its `next` pointer to `%2` and we will be processing `func` instead of continue walking the child list!
Because of this, we should never be calling `replaceOperand` or `replaceUsesWith` when we are walking the IR linked list. If we want to do so, we must create a temporary workList and add all the insts to the work list before we make any modifications. The `IRInst::getModifiableChildren` utility function will return a temporary work list for safe iteration on the children. The same can be said to the def-use linked list. There is `traverseUses` and `traverseUsers` utility functions defined in `slang-ir.h` to help with walking the def-use list safely.
Another detail to keep in mind is that any local references to an inst may become out-of-date after a call to `replaceOperand` or `replaceUsesWith`. Consider the following code:
```
IRBuilder builder;
auto x = builder.emitXXX(); // x is some non-hoistable value.
auto ptr = builder.getPtrType(x); // create ptr(x).
x->replaceUsesWith(intType); // this renders `ptr` obsolete!!
auto var = builder.emitVar(ptr); // use the obsolete inst to create another inst.
```
In this example, calling `replaceUsesWith` will cause `ptr` to represent `Ptr(int)`, which may already exist in the global scope. After this call, all uses of `ptr` should be replaced with the global `Ptr(int)` inst instead. `IRBuilder` has provided the mechanism to track all the insts that are removed due to deduplication, and map those removed but not yet deleted insts to the existing inst. When using `ptr` to create a new inst, `IRBuilder` will first check if `ptr` should map to some existing hoistable inst in the global deduplication map and replace it if possible. This means that after the call to `builder.emitVar`, `var->type` is not equal to to `ptr`.
### Best Practices
In summary, the best practices when modifying the IR is:
- Never call `replaceUsesWith` or `replaceOperand` when walking raw linked lists in the IR. Always create a work list and iterate on the work list instead. Use `IRInst::getModifiableChildren` and `traverseUses` when you need to modify the IR while iterating.
- Never assume any local references to an `inst` is up-to-date after a call to `replaceUsesWith` or `replaceOperand`. It is OK to continue using them as operands/types to create a new inst, but do not assume the created inst will reference the same inst passed in as argument.

View file

@ -1,259 +0,0 @@
An overview of the Slang Compiler
=================================
This document will attempt to walk through the overall flow of the Slang compiler, as an aid to developers who are trying to get familiar with the codebase and its design.
More emphasis will be given to places where the compiler design is nontraditional, or might surprise newcomers; things that are straightforward won't get much detail.
High-Level Concepts
-------------------
Compilation is always performed in the context of a *compile request*, which bundles together the options, input files, and request for code generation.
Inside the code, there is a type `CompileRequest` to represent this.
The user specifies some number of *translation units* (represented in the code as a `TranslationUnitRequest`) which comprise some number of *sources* (files or strings).
HLSL follows the traditional C model where a "translation unit" is more or less synonymous with a source file, so when compiling HLSL code the command-line `slangc` will treat each source file as its own translation unit.
For Slang code, the command-line tool will by default put all source files into a single translation unit (so that they represent a shared namespace0).
The user can also specify some number of *entry points* in each translation unit (`EntryPointRequest`), which combines the name of a function to compile with the pipeline stage to compile for.
In a single compile request, we can generate code for zero or more *targets* (represented with `TargetRequest`) a target defines both the format for output code (e.g., DXIL or SPIR-V) and a *profile* that specifies the capability level to assume (e.g., "Shader Model 5.1").
It might not be immediately clear why we have such fine-grained concepts as this, but it ends up being quite important to decide which pieces of the compiler are allowed to depend on which pieces of information (e.g., whether or not a phase of compilation gets to depend on the chosen target).
The "Front End"
---------------
The job of the Slang front-end is to turn textual source code into a combination of code in our custom intermediate representation (IR) plus layout and binding information for shader parameters.
### Lexing
The first step in the compiler (after a source file has been loaded into memory) is to *lex* it.
The `Lexer` type is implement in `lexer.{h,cpp}` and produces `Token`s that represent the contents of the file on-demand as requested by the next phase of compilation.
Each token stores a `TokenCode` that indicates the kind of token, the raw text of the token, and the location in the source code where it is located.
Source locations use a somewhat clever encoding to avoid being bloated (they are a single integer rather than separate file, line, and column fields).
We don't make any attempt in the lexer to extract the actual value of integer and floating-point literals; we just store the raw text.
We also don't try to distinguish keywords from identifiers; keywords show up as ordinary identifier tokens.
Much of the complexity (and inefficiency) in the current lexer is derived from the need to support C-isms like backspace line continuation, and special case rules like allowing `<>` to delimit a file name string after a `#include`.
### Preprocessing
The preprocessor (`Preprocessor`) in `preprocessor.{h,cpp}` deals with `#include` constructs, macro expansions, etc.
It pulls tokens from the lexer as needed (making sure to set flags to control the lexer behavior when required) and uses a limited lookahead to decide what to do with each token.
The preprocessor maintains a stack of input streams, with the original source file at the bottom, and pushes entries for `#include`d files, macros to expand etc.
Macro definitions store a sequence of already-lexed tokens, and expansion simply "replays" these tokens.
Expansion keeps a notion of an "environment" for looking up identifiers and mapping them to macro definitions.
Calling through to a function-style macro creates a fresh environment that maps the macro parameter names to pseudo-macros for the arguments.
We still tokenize code in inactive preprocessor conditionals, but don't evaluate preprocessor directives inside inactive blocks (except those that may change the active/inactive state).
Preprocessor directives are each handled as a callback on the preprocessor state and are looked up by name; adding a new directive (if we ever had a reason to) is a fairly simple task.
One important detail of the preprocessor is that it runs over a full source file at once and produces a flat array of `Token`s, so that there is no direct interaction between the parser and preprocessor.
### Parsing
The parser (`Parser` in `parser.{h,cpp}`) is mostly a straightforward recursive-descent parser.
Because the input is already tokenized before we start, we can use arbitrary lookahead, although we seldom look ahead more than one token.
Traditionally, parsing of C-like languages requires context-sensitive parsing techniques to distinguish types from values, and deal with stuff like the C++ "most vexing parse."
Slang instead uses heuristic approaches: for example, when we encounter an `<` after an identifier, we first try parsing a generic argument list with a closing `>` and then look at the next token to determine if this looks like a generic application (in which case we continue from there) or not (in which case we backtrack).
There are still some cases where we use lookup in the current environment to see if something is a type or a value, but officially we strive to support out-of-order declarations like most modern languages.
In order to achieve that goal we will eventually move to a model where we parse the bodies of declarations and functions in a later pass, after we have resolved names in the global scope.
One important choice in the parser is that we strive to avoid hard-coding keywords as much as possible.
We already track an environment for C-like parsing, and we simply extend that so that we also look up declaration and statement keywords in the environment.
This means that most of the language "keywords" in Slang aren't keywords at all, and instead are just identifiers that happen to be bound to syntax in the default environment.
Syntax declarations are associated with a callback that is invoked to parse the construct they name.
The design of treating syntax as ordinary declarations has a long-term motivation (we'd like to support a flexible macro system) but it also has short-term practical benefits.
It is easy for us to add new modifier keywords to the language without touching the lexer or parser (just adding them to the core module), and we also don't have to worry about any of Slang's extended construct (e.g., `import`) breaking existing HLSL code that just happens to use one of those new keywords as a local variable name.
What the parser produces is an abstract syntax tree (AST).
The AST currently uses a strongly-typed C++ class hierarchy with a "visitor" API generated via some ugly macro magic.
Dynamic casting using C++ RTTI is used in many places to check the class of an AST node; we aren't happy with this but also haven't had time to implement a better/faster solution.
In the parsed AST, both types and expressions use the same representation (because in an expression like `A(B)` it is possible that `A` will resolve to a type, or to a function, and we don't know which yet).
One slightly odd design choice in the parser is that it attaching lexical scoping information to the syntax nodes for identifiers, and any other AST node that need access to the scope/environment where it was defined. This is a choice we will probably change at some point, but it is deeply ingrained right now.
### Semantic Checking
The semantic checking step (`check.{h,cpp}`) is, not surprisingly, the most complicated and messiest bit of the compiler today.
The basic premise is simple: recursively walk the entire AST and apply semantic checking to each construct.
Semantic checking applies to one translation unit at a time.
It has access to the list of entry points for the translation unit (so it can validate them), but it *not* allowed to depend on the compilation target(s) the user might have selected.
Semantic checking of an expression or type term can yield the same AST node, with type information added, or it can return newly constructed AST needs (e.g., when an implicit cast needs to be inserted).
Unchecked identifiers or member references are always resolved to have a pointer to the exact declaration node they are referencing.
Types are represented with a distinct class hierarchy from AST nodes, which is also used for a general notion of compile-time values which can be used to instantiate generic types/functions/etc.
An expression that ends up referring to a type will have a `TypeType` as its type, which will hold the actual type that the expression represents.
The most complicated thing about semantic checking is that we strive to support out-of-order declarations, which means we may need to check a function declaration later in the file before checking a function body early in the file.
In turn, that function declaration might depend on a reference to a nested type declared somewhere else, etc.
We currently solve this issue by doing some amount of on-demand checking; when we have a reference to a function declaration and we need to know its type, we will first check if the function has been through semantic checking yet, and if not we will go ahead and recursively type check that function before we proceed.
This kind of unfounded recursion can lead to real problems (especially when the user might write code with circular dependencies), so we have made some attempts to more strictly "phase" the semantic checking, but those efforts have not yet been done systematically.
When code involved generics and/or interfaces, the semantic checking phase is responsible for ensuring that when a type claims to implement an interface it provides all of the requirements of that interface, and it records the mapping from requirements to their implementations for later use. Similarly, the body of a generic is checked to make sure it uses type parameters in ways that are consistent with their constraints, and the AST is amended to make it explicit when an interface requirement is being employed.
### Lowering and Mandatory Optimizations
The lowering step (`lower-to-ir.{h,cpp}`) is responsible for converting semantically valid ASTs into an intermediate representation that is more suitable for specialization, optimization, and code generation.
The main thing that happens at this step is that a lot of the "sugar" in a high-level language gets baked out. For example:
- A "member function" in a type will turn into an ordinary function that takes an initial `this` parameter
- A `struct` type nested in another `struct` will turn into an ordinary top-level `struct`
- Compound expressions will turn into sequences of instructions that bake the order of evaluation
- High-level control-flow statements will get resolved to a control-flow graph (CFG) of basic blocks
The lowering step is done once for each translation unit, and like semantic checking it does *not* depend on any particular compilation target.
During this step we attach "mangled" names to any imported or exported symbols, so that each function overload, etc. has a unique name.
After IR code has been generated for a translation unit (now called a "module") we next perform a set of "mandatory" optimizations, including SSA promotion and simple copy propagation and elimination of dead control-flow paths.
These optimizations are not primarily motivated by a desire to speed up code, but rather to ensure that certain "obvious" simplifications have been performed before the next step of validation.
After the IR has been "optimized" we perform certain validation/checking tasks that would have been difficult or impossible to perform on the AST.
For example, we can validate that control flow never reached the end of a non-`void` function, and issue an error otherwise.
There are other validation tasks that can/should be performed at this step, although not all of them are currently implemented:
- We should check that any `[unroll]` loops can actually be unrolled, by ensuring that their termination conditions can be resolved to a compile-time constant (even if we don't know the constant yet)
- We should check that any resource types are being used in ways that can be statically resolved (e.g., that the code never conditionally computes a resource to reference), since this is a requirement for all our current targets
- We should check that the operands to any operation that requires a compile-time constant (e.g., the texel offset argument to certain `Sample()` calls) are passed values that end up being compile-time constants
The goal is to eliminate any possible sources of failure in low-level code generation, without needing to have a global view of all the code in a program.
Any error conditions we have to push off until later starts to limit the value of our separate compilation support.
### Parameter Binding and Type Layout
The next phase of parameter binding (`parameter-binding.{h,cpp}`) is independent of IR generation, and proceeds based on the AST that came out of semantic checking.
Parameter binding is the task of figuring out what locations/bindings/offsets should be given to all shader parameters referenced by the user's code.
Parameter binding is done once for each target (because, e.g., Vulkan may bind parameters differently than D3D12), and it is done for the whole compile request (all translation units) rather than one at a time.
This is because when users compile something like HLSL vertex and fragment shaders in distinct translation units, they will often share the "same" parameter via a header, and we need to ensure that it gets just one location.
At a high level, parameter binding starts by computing the *type layout* of each shader parameter.
A type layout describes the amount of registers/bindings/bytes/etc. that a type consumes, and also encodes the information needed to compute offsets/registers for individual `struct` fields or array elements.
Once we know how much space each parameter consumes, we then inspect an explicit binding information (e.g., `register` modifiers) that are relevant for the target, and build a data structure to record what binding ranges are already consumed.
Finally, we go through any parameters without explicit binding information and assign them the next available range of the appropriate size (in a first-fit fashion).
The parameter binding/layout information is what the Slang reflection API exposes. It is layered directly over the Slang AST so that it accurately reflects the program as the user wrote it, and not the result of lowering that program to our IR.
This document describes parameter binding as a "front end" activity, but in practice it is something that could be done in the front-end, the back-end or both.
When shader code involves generic type parameters, complete layout information cannot be generated until the values of these parameters are fully known, and in practice that might not happen until the back end.
### Serialization
It is not yet fully implemented, but our intention is that the last thing the front-end does is to serialize the following information:
- A stripped-down version of the checked AST for each translation unit including declarations/types, but not function bodies
- The IR code for each translation unit
- The binding/layout information for each target
The above information is enough to type-check a subsequent module that `import`s code compile in the front-end, to link against its IR code, or to load and reflect type and binding information.
The "Back End"
--------------
The Slang back end logically starts with the user specifying:
- An IR module, plus any necessary modules to link in and provide its dependencies
- An entry point in that module, plus arguments for any generic parameters that entry point needs
- A compilation target (e.g., SPIR-V for Vulkan)
- Parameter binding/layout information for that module and entry point, computed for the chosen target
We eventually want to support compiling multiple entry points in one pass of the back end, but for now it assumes a single entry point at a time
### Linking and Target Specialization
The first step we perform is to copy the chosen entry point and anything it depends on, recursively into a "fresh" IR module.
We make a copy of things so that any optimization/transformation passes we do for one target don't alter the code the front-end produced in ways that affect other targets.
While copying IR code into the fresh module, we have cases where there might be multiple definitions of the same function or other entity.
In those cases, we apply "target specialization" to pick the definition that is the best for the chosen target.
This step is where we can select between, say, a built-in definition of the `saturate` function for D3D targets, vs. a hand-written one in a Slang standard module to use for GLSL-based targets.
### API Legalization
If we are targeting a GLSL-based platform, we need to translate "varying" shader entry point parameters into global variables used for cross-stage data passing.
We also need to translate any "system value" semantics into uses of the special built-in `gl_*` variables.
We currently handle this kind of API-specific legalization quite early in the process, performing it right after linking.
### Generic Specialization
Once the concrete values for generic parameters are know we can set about specializing code to the known types.
We do this by cloning a function/type/whatever and substituting in the concrete arguments for the parameters.
This process can be continued as specializing one function may reveal opportunities to specialize others.
During this step we also specialize away lookup of interface requirements through their witness tables, once generic witness-table parameters have been replaced with concrete witness tables.
At the end of specialization, we should have code that makes no use of user-defined generics or interfaces.
### Type Legalization
While HLSL and Slang allow a single `struct` type to contain both "ordinary" data like a `float3` and "resources" like a `Texture2D`, the rules for GLSL and SPIR-V are more restrictive.
There are some additional wrinkles that arise for such "mixed" types, so we prefer to always "legalize" the types in the users code by replacing an aggregate type like:
```hlsl
struct Material { float4 baseColor; Texture2D detailMap; };
Material gMaterial;
```
with separate declarations for ordinary and resource fields:
```hlsl
struct Material { float4 baseColor; }
Material gMaterial;
Texture2D gMaterial_detailMap;
```
Changing the "shape" of a type like this (so that a single variable becomes more than one) needs to be done consistently across all declarations/functions in the program (hence why we do it after specialization, so that all concrete types are known).
### Other Optimizations
We dont' currently apply many other optimizations on the IR code in the back-end, under the assumption that the lower-level compilers below Slang will do some of the "heavy lifting."
That said, there are certain optimizations that Slang must do eventually, for semantic completeness. One of the most important examples of these is implementing the semantics of the `[unroll]` attribute, since we can't always rely on downstream compilers to have a capable unrolling implementation.
We expect that over time it will be valuable for Slang to support a wider array of optimization passes, as long as they are ones that are considered "safe" to do above the driver interface, because they won't interfere with downstream optimization opportunities.
### Emission
Once we have transformed the IR code into something that should be legal for the chosen target, we emit high-level source code in either HLSL or GLSL.
The emit logic is mostly just a scan over the IR code to emit a high-level declaration for each item: an IR structure type becomes a `struct` declaration, and IR function becomes a function definition, etc.
In order to make the generated code a bit more readable, the Slang compiler currently does *not* emit declarations using their mangled names and instead tries to emit everything using a name based on how it was originally declared.
To improve the readability of function bodies, the emit logic tries to find consecutive sequences of IR instructions that it can emit as a single high-level language expression. This reduces the number of temporaries in the output code, but we need to be careful about inserting parentheses to respect operator precedence, and also to not accidentally change the order of evaluation of code.
When emitting a function body, we need to get from the low-level control flow graph (CFG) to high-level structured control-flow statements like `if`s and loops. We currently do this on a per-function basis during code emission, using an ad hoc algorithm based on control-flow structured information we stored in the IR.
A future version of the compiler might implement something more complete like the "Relooper" algorithm used by Emscripten.
### Downstream Compiler Execution
Once we have source code, we can invoke downstream compilers like fxc, dxc, and glslang to generate binary code (and optionally to disassemble that code for console output).
The Slang compiler also supports a "pass through" mode where it skips most of the steps outlined so far and just passes text along to these downstream compilers directly. This is primarily intended as a debugging aid for developers working on Slang, since it lets you use the same command-line arguments to invoke both Slang compilation and compilation with these other compilers.
Conclusion
----------
Hopefully this whirlwind introduction to the flow of the Slang compiler gives some idea of how the project fits together, and makes it easier to dive into the code and start being productive.

View file

@ -1,216 +0,0 @@
Semantic Checking
=================
The semantic checking logic in the Slang compiler is located in `source/slang/slang-check*`.
Semantic checking is applied in the front end after parsing, and before lowering of code to the IR.
The main job of the semantic checking stage is to detect and forbid code that has errors in it.
The errors and other diagnostics reported are intended to be of benefit to the user, but semantic checking is also important for the overall function of the compiler.
Stages of compilation after semantic checking (e.g., lowering to the IR) are allowed to *assume* that the code they operate on is semantically valid, and may assert-fail or even crash on invalid code.
Semantic checking is thus not an optional step, and there is no meaningful way to turn it off.
Semantic Checking can be broken into three main kinds of work, and we will discuss how each is implemented in the following sections:
* Checking of "terms" which include expressions and type expressions
* Checking of statements
* Checking of declarations
Checking Terms
--------------
### Some Terminology for Terms
We use the word "term" to refer generically to something that can be evaluated to produce a result, but where we do not yet know if the result will be a type or a value. For example, `Texture2D` might be a term that results in a type, while `main` might be a term that results in a value (of function type), but both start out as a `NameExpr` in the AST. Thus the AST uses the class hierarchy under `Expr` to represent terms, whether they evaluate to values or types.
There is also the `Type` hierarchy, but it is important to understand that `Type` represents types as their logical immutable selves, while `Expr`s that evaluate to types are *type expressions* which can be concretely pointed to in the user's code. Type expressions have source locations, because they represent something the user wrote in their code, while `Type`s don't have singular locations by default.
The codebase uses the notion of a `TypeRepr` for those `Expr`s that should only ever evaluate to types, and there is also a `TypeExp` type that is meant to package up a `Type` with an optional `Expr` for a type expression that produced it. The names of these implementation types aren't great, and should probably not be spread further.
A value-bearing `Expr` will eventually be given a `Type` that describes the type of value it produces.
An `Expr` that evaluates to a type will eventually be given a `Type` that uses the `TypeType` subclass to indicate the specific type it evaluated to.
The `TypeType` idea is kind of kludge to represent "kinds" (the "types of types") in our system.
More correctly, we should say that every `Expr` gets a *classifier*, with the classifiers for value expressions being `Type`s and the classifiers for type expressions being kinds, but we haven't had time or inclination to fix the model yet.
### The Big Picture
Checking of terms is largely done as an ad hoc postorder traversal of the AST.
That is, in order to check a compound expression like `f(a)` we first need to check `f` and `a` before we can check the function call.
When checking an expression there are four main things that have to be done:
1. Recursively check all sub-expressions.
2. Detect and diagnose any errors (or warnings) in the current expression.
3. Optionally construct a new expression to replace the current expression (or one of its sub-expressions) in cases where the syntactic form of the input doesn't match the desired semantics (e.g., make an implicit type conversion explicit in the AST).
4. Determine the correct type for the result expression, and store it so that it can be used by subsequent checking.
Those steps may end up being interleaved in practice.
### Handling Errors Gracefully
If an error is detected in a sub-expression, then there are a few issues that need to be dealt with:
* We need to ensure that an erroneous sub-expression can't crash the compiler when it goes on to check a parent expression. For example, leaving the type of an expression as null when it has errors is asking for trouble.
* We ideally want to continue to diagnose other unrelated errors in the same expression, statement, function, or file. That means that we shouldn't just bail out of semantic checking entirely (e.g., by throwing an exception).
* We don't want to produce "cascading" errors where, e.g., an error in `a` causes us to also report an error in `a + b` because no suitable operator overload was found.
We tackle all of these problems by introducing the `ErrorType` and `ErrorExpr` classes.
If we can't determine a correct type for an expression (say, because it has an error) then we will assign it the type `ErrorType`.
If we can't reasonably form an expression to return *at all* then we will return an `ErrorExpr` (which has type `ErrorType`).
These classes are designed to make sure that subsequent code won't crash on them (since we have non-null objects), but to help avoid cascading errors.
Some semantic checking logic will detect `ErrorType`s on sub-expressions and skip its own checking logic (e.g., this happens for function overload resolution), producing an `ErrorType` further up.
In other cases, expressions with `ErrorType` can be silently consumed.
For example, an erroneous expression is implicitly convertible to *any* type, which means that assignment of an error expression to a local variable will always succeed, regardless of variable's type.
### Overload Resolution
One of the most involved parts of expression checking is overload resolution, which occurs when there is an expression of the form `f(...)` where `f` could refer to multiple function declarations.
Our basic approach to overload resolution is to iterate over all the candidates and add them to an `OverloadResolveContext`.
The context is responsible for keeping track of the "best" candidate(s) seen so far.
Traditionally a language defines rules for which overloads are "better" than others that focus only on candidates that actually apply to the call site.
This is the right way to define language semantics, but it can produce sub-optimal diagnostics when *no* candidate was actually applicable.
For example, suppose the user wrote `f(a,b)` and there are 100 functions names `f`, but none works for the argument types of `a` and `b`.
A naive approach might just say "no overload applicable to arguments with such-and-such types."
A more advanced compiler might try to list all 100 candidates, but that wouldn't be helpful.
If it turns out that of the 100 candidates, only 10 of them have two parameters, then it might be much more helpful to list only the 10 candidates that were even remotely applicable at the call site.
The Slang compiler strives to provide better diagnostics on overload resolution by breaking the checking of a candidate callee into multiple phases, and recording the earliest phase at which a problem was detected (if any).
Candidates that made it through more phases of checking without errors are considered "better" than other candidates, even if they ultimately aren't applicable.
### Type Conversions
Conversion of values from one type to another can occur both explicitly (e.g., `(int) foo`) and implicitly (e.g., `while(foo)` implicitly converts `foo` to a `bool`).
Type conversion also tied into overload resolution, since some conversions get ranked as "better" than others when deciding between candidates (e.g., converting an `int` to a `float` is preferred over converting it to a `double`).
We try to bottleneck all kinds of type conversion through a single code path so that the various kinds of conversion can be handled equivalently.
### L-Values
An *l-value* is an expression that can be used as the destination of an assignment, or for read-modify-write operations.
We track the l-value-ness of expressions using `QualType` which basically represents a `Type` plus a bit to note whether something is an l-value or not.
(This type could eventually be compressed down to be stored as a single pointer, but we haven't gotten to that yet)
We do not currently have a concept like the `const` qualifier in C/C++, that would be visible to the language user.
Propagation of l-value-ness is handled in an ad hoc fashion in the small number of expression cases that can ever produce l-values.
The default behavior is that expressions are not l-values and the implicit conversion from `Type` to `QualType` reflects this.
Checking Statements
-------------------
Checking of statements is relatively simpler than checking expressions.
Statements do not produce values, so they don't get assigned types/classifiers.
We do not currently have cases where a statement needs to be transformed into an elaborated form as part of checking (e.g., to make implicit behavior explicit), so statement checking operates "in place" rather than optionally producing new AST nodes.
The most interesting part of statement checking is that it requires information about the lexical context.
Checking a `return` statement requires knowing the surrounding function and its declared result type.
Checking a `break` statement requires knowing about any surrounding loop or `switch` statements.
We represent the surrounding function explicitly on the `SemanticsStmtVisitor` type, and also use a linked list of `OuterStmtInfo` threaded up through the stack to track lexically enclosing statements.
Note that semantic checking of statements at the AST level does *not* encompass certain flow-sensitive checks.
For example, the logic in `slang-check-stmt.cpp` does not check for or diagnose any of:
* Functions that fail to `return` a value along some control flow paths
* Unreachable code
* Variables used without being initialized first
All of the above are instead intended to be handled at the IR level (where dataflow analysis is easier) during the "mandatory" optimization passes that follow IR lowering.
Checking Declarations
---------------------
Checking of declarations is the most complicated and involved part of semantic checking.
### The Problem
Simple approaches to semantic checking of declarations fall into two camps:
1. One can define a total ordering on declarations (usually textual order in the source file) and only allow dependencies to follow that order, so that checking can follow the same order. This is the style of C/C++, which is inherited from the legacy of traditional single-pass compilers.
2. One can define a total ordering on *phases* of semantic checking, so that every declaration in the file is checked at phase N before any is checked at phase N+1. E.g., the types of all variables and functions must be determined before any expressions that use those variables/functions can be checked. This is the style of, e.g., Java and C#, which put a premium on defining context-free languages that don't dictate order of declaration.
Slang tries to bridge these two worlds: it has inherited features from HLSL that were inspired by C/C++, while it also strives to support out-of-order declarations like Java/C#.
Unsurprisingly, this leads to unique challenges.
Supporting out-of-order declarations means that there is no simple total order on declarations (we can have mutually recursive function or type declarations), and supporting generics with value parameters means there is no simple total order on phases.
For that last part observe that:
* Resolving an overloaded function call requires knowing the types of the parameters for candidate functions.
* Determining the type of a parameter requires checking type expressions.
* Type expressions may contain value arguments to generics, so checking type expressions requires checking value expressions.
* Value expressions can include function calls (e.g., operator invocations), which then require overload resolution to type-check.
### The Solution
Our declaration checking logic takes the idea of phase-based checking as a starting point, but instead of a global ordering on phases we use a per-declaration order.
Each declaration in the Slang AST will have a `DeclCheckState` that represents "how checked" that declaration is.
We can apply semantic checking logic to a declaration `D` to raise its state to some desired state `S`.
By default, the logic in `slang-check-decl.cpp` will do a kind of "breadth-first" checking strategy where it will try to raise all declarations to the one state before moving on to the next.
In many cases this will reproduce the behavior of a Java or C#-style compiler with strict phases.
The main difference for Slang is that whenever, during the checking of some declaration `D`, we discover that we need information from some other declaration `E` that would depend on `E` being in state `S`, we manually call a routine `ensureDecl(E,S)` whose job is to ensure that `E` has been checked enough for us to proceed.
The `ensureDecl` operation will often be a no-op, if the declaration has already been checked previously, but in cases where the declaration *hasn't* been checked yet it will cause the compiler to recursively re-enter semantic checking and try to check `E` until it reached the desired state.
In pathological cases, this method can result in unbounded recursion in the type checker. The breadth-first strategy helps to make such cases less likely, and introducing more phases to semantic checking can also help reduce problems.
In the long run we may need to investigate options that don't rely on unbounded recursion.
### The Rules
As a programmer contributing to the semantic checking infrastructure, the declaration-checking strategy requires following a few rules:
* If a piece of code is about to rely on some property of a declaration that might be null/absent/wrong if checking hasn't been applied, it should use `ensureDecl` to make sure the declaration in question has been checked enough for that property to be available.
* If adding some `ensureDecl`s leads to an internal compiler error because of circularity in semantic checking, then either the `ensureDecl`s were misplaced, or they were too strong (you asked for more checking than was necessary), or in the worse case we need to add more phases (more `DeclCheckState`s) to separate out the checking steps and break the apparent cycle.
* In very rare cases, semantic checking for a declaration may want to use `SetCheckState` to update the state of the declaration itself before recursively `ensureDecl`ing its child declarations, but this must be done carefully because it means you are claiming that the declaration is in some state `S`, while not having complete the checking that is associated with state `S`.
* It should *never* be necessary to modify `checkModuleDecl` so that it performs certain kinds of semantic analysis on certain declarations before others (e.g., iterate over all the `AggTypeDecl`s before all the `FuncDecl`s). If you find yourself tempted to modify it in such a way, then add more `DeclCheckState`s to reflect the desired ordering. It is okay to have phases of checking that only apply to a subset of declarations.
* Every statement and expression/term should be checked once and only once. If something is being checked twice and leading to failures, the right thing is to fix the source of the problem in declaration checking, rather than make the expression/statement checking be defensive against this case.
Name Lookup
-----------
Lookup is the processing of resolving the contextual meaning of names either in a lexical scope (e.g., the user wrote `foo` in a function body - what does it refer to?) or in the scope of some type (e.g., the user wrote `obj.foo` for some value `obj` of type `T` - what does it refer to?).
Lookup can be tied to semantic analysis quite deeply.
In order to know what a member reference like `obj.foo` refers to, we not only need to know the type of `obj`, but we may also need to know what interfaces that type conforms to (e.g., it might be a type parameter `T` with a constraint `T : IFoo`).
In order to support lookup in the presence of our declaration-checking strategy described above, the lookup logic may be passed a `SemanticsVisitor` that it can use to `ensureDecl()` declarations before it relies on their properties.
However, lookup also currently gets used during parsing, and in those cases it may need to be applied without access to the semantics-checking infrastructure (since we currently separate parsing and semantic analysis).
In those cases a null `SemanticsVisitor` is passed in, and the lookup process will avoid using lookup approaches that rely on derived semantic information.
This is fine in practice because the main thing that gets looked up during parsing are names of `SyntaxDecl`s (which are all global) and also global type/function/variable names.
Known Issues
------------
The largest known issue for the semantic checking logic is that there are currently dependencies between parsing and semantic checking.
Just like a C/C++ parser, the Slang parser sometimes needs to disambiguate whether an identifier refers to a type or value to make forward progress, and that would in general require semantic analysis.
Ideally the way forward is some combination of the following two strategies:
* We should strive to make parsing at the "global scope" fully context-insensitive (e.g., by using similar lookahead heuristics to C#). We are already close to this goal today, but will need to be careful that we do not introduce regressions compared to the old parser (perhaps a "compatibility" mode for legacy HLSL code is needed?)
* We should delay the parsing of nested scopes (both function and type bodies bracketed with `{}`) until later steps of the compiler. Ideally, parsing of function bodies can be done in a context-sensitive manner that interleaves with semantic checking, closer to the traditional C/C++ model (since we don't care about out-of-order declarations in function bodies).

View file

@ -1,331 +0,0 @@
Serialization
=============
Slang has a collection of serialization components. This document will be used to discuss serialization around IR/AST and modules as it currently exists. A separate document will describe the future serialization plans.
All of the serialization aspects here focus on binary serialization.
The major components are
* IR Serialization
* AST/Generalized Serialization
* SourceLoc Serialization
* Riff container
* C++ Extractor
Generalized Serialization
=========================
Generalized serialization is the mechanism used to save 'arbitrary' C++ structures. It is currently used for serializing the AST. Although not necessary, generalized serialization is typically helped out by the `C++ extractor`, which can rudimentary parse C++ source, and extract class-like types and their fields. The extraction then produces header files that contain macros that can then be used to drive serialization.
It's worth discussing briefly what the philosophy is behind the generalized serialization system. To talk about this design it is worth talking a little about serialization in general and the issues involved. Lets say we have a collection of C++ class instances that contain fields. Some of those fields might be pointers. Others of the fields might be a templated container type like a Dictionary<K,V>. We want to take all of these instances, write them to a file, such that when we read the file back we will have the equivalent objects with equivalent relationships.
We could imagine a mechanism that saved off each instance, by writing off the address of the object, and then the in memory representation for all the instances that can be reached. When reading back the objects would be at different locations in memory. If we knew where the pointers were, we could use a map of old pointers to the new instances and fix them up. Problems with this simple mechanism occur because...
* If we try to read back on a different machine, with a different pointer size, the object layout will be incompatible
* If we try to read back on the same machine where the source is compiled by a different compiler, the object layout might be incompatible (say bool or enum are different size)
* Endianness might be different
* Knowing where all the pointers are and what they point to and therefore what to serialize is far from simple.
* The alignment of types might be different across different processors and different compilers
The implementation makes a distinction between the 'native' types, the regular C++ in memory types and 'serial' types. Each serializable C++ type has an associated 'serial' type - with the distinction that it can be written out and (with perhaps some other data) read back in to recreate the C++ type. The serial type can be a C++ type, but is such it can be written and read from disk and still represent the same data.
The approach taken in Slang is to have each 'native' type (ie the C++ type) that is being serialized have a serializable 'dual' type. The serial type can be an explicit C++ type, or it might implicit (ie not have a C++ type) and calculated at Slang startup.
The important point here is that the Serial type must writable on one target/process and readable correctly on another.
The easy cases are types that have an alignment and representation that will work over all targets. These would be most built in types - integrals 8,16,32 and float32. Note that int64 and double are *not* so trivial, because on some targets that require 8 byte alignment - so they must be specially defined to have 8 byte alignment.
Another odd case is bool - it has been on some compilers 32 bits, and on others 8 bits. Thus we need to potentially convert.
For this and other types it is therefore necessary to have function that can convert to and from the serialized dual representation.
## Generalized Field Conversion
For types that contain fields, it would be somewhat laborious to have to write all of the conversion functions by hand. To avoid this we use the macro output of the C++ extractor to automatically generate the appropriate functions.
Take DeclRefExpr from the AST hierarchy - the extractor produces a macro something like...
```
#define SLANG_FIELDS_ASTNode_DeclRefExpr(_x_, _param_)\
_x_(scope, (RefPtr<Scope>), _param_)\
_x_(declRef, (DeclRef<Decl>), _param_)\
_x_(name, (Name*), _param_)
```
DeclRefExpr derives from Expr and this might hold other fields and so forth.
The macros can generate the appropriate conversion functions *if* we have the conversion functions for the field types. Field type conversions can be specified via a special macro that describes how the conversion to and from the type work. To make the association between the native and serial type, as well as provide the functions to convert, we use the template
```
template <typename T>
struct SerialTypeInfo;
```
and specialize it for each native type. The specialization holds
* SerialType - The type that will be used to represent the native type
* NativeType - The native type
* SerialAlignment - A value that holds what kind of alignment the SerialType needs to be serializable (it may be different from SLANG_ALIGN_OF(SerialType)!)
* toSerial - A function that with the help of SerialWriter convert the NativeType into the SerialType
* toNative - A function that with the help of SerialReader convert the SerialType into the NativeType
It is useful to have a structure that can hold the type information, so it can be stored. That is achieved with
```
template <typename T>
struct SerialGetType;
```
This template can be specialized for a specific native types - but all it holds is just a function getType, which returns a `SerialType*`, which just holds the information held in the SerialTypeInfo template, but additionally including the size of the SerialType.
So we need to define a specialized SerialTypeInfo for each type that can be a field in a NodeBase/RefObject derived type. We don't need to define anything explicitly for the NodeBase derived types, as we will just generate the layout from the fields. How do we know the fields? We just used the macros generated from the C++ extractor.
So first a few things to observe...
1) Some types don't need any conversion to be serializable - int8_t, or float the bits can just be written out and read in (1)
2) Some types need a conversion but it's very simple - for example an enum without explicit size, being written as an explicit size
3) Some types can be written out but would not be directly readable or usable with different targets/processors, so need converting
4) Some types require complex conversions that require programmer code - like Dictionary/List
For types that need no conversion (1), we can just use the template SerialIdentityTypeInfo
```
template <>
struct SerialTypeInfo<SomeType> : public SerialIdentityTypeInfo<SomeType> {};
```
This specialization means that SomeType can be written out and read in across targets/compilers without problems.
For (2) we have another template that will do the conversion for us
```
template <typename NATIVE_T, typename SERIAL_T>
struct SerialConvertTypeInfo;
```
That we can use as above, and specify the native and serial types.
For (3) there are a few scenarios. For any field in a serial type we must store in the serialized type such that the representation will work across all processors/compilers. So one problematic type is `bool`. It's not specified how it's laid out in memory - and some compiles have stored it as a word. Most recently it's been stored as a byte. To make sure bool is ok for serialization therefore we store as a uint8_t.
Another example would be double. It's 64 bits, but on some arches/compilers it's SLANG_ALIGN_OF is 4 and on others it's 8. On some architectures a non aligned read will lead to a fault, on others it might be very slow. To work around this problem therefore we have to ensure double has the alignment that will work across all targets - and that alignment is 8. In that specific case that issue is handled via SerialBasicTypeInfo, which makes the SerialAlignment the sizeof the type.
For (4) there are a few things to say. First a type can always implement a custom version of how to do a conversion by specializing `SerialTypeInfo`. But there remains another nagging issue - types which allocate/use other memory that changes at runtime. Clearly we cannot define 'any size of memory' in a fixed SerialType defined in a specialization of SerialTypeInfo. The mechanism to work around this is to allow arbitrary arrays to be stored, that can be accessed via an SerialIndex. This will be discussed more once we discuss a little more about the file system, and SerialIndex.
## Struct value types
There is a mechanism to allow the simple serialization of 'value' struct types for this to work it requires
* The fields of the struct are serializable and public
* The super class (if there is one) is serializable
If this is the case, it is not necessary to write a `SerialTypeInfo<T>` specialization, the C++ extractor and it's reflection can generate the specialization for you. The steps needed
* Place SLANG_VALUE_CLASS(your type) in the definition of your struct
* Make sure that the header containing the struct definition is included in the ones C++ extractor examines
* Instead of implementing SerialTypeInfo for your type use the macro SLANG_VALUE_TYPE_INFO(your type)
If there are problems looking at the contents of `slang-generated-value.h` and `slang-generated-value-macro.h`.
It should be noted that currently because of limitations in the C++ extractor, all of the types must be defined in the same scope.
Also because value types are always fields in generalized serialization, they do not need to be identified with a sub type, even though C++ extractor does generate a ValueType enum.
## Generalized Serialization Format
The serialization format used is 'stream-like' with each 'object' stored in order. Each object is given an index starting from 1. 0 is used to be in effect nullptr. The stream looks like
```
SerialInfo::Entry (for index 1)
Payload for type in entry
SerialInfo::Entry (for index 2)
Payload for type in entry
...
...
```
That when writing we have an array that maps each index to a pointer to the associated header. We also have a map that maps native pointers to their indices. The Payload *is* the SerialType for thing saved. The payload directly follows the Entry data. Each object in this list can only be a few types of things
* NodeBase derived type
* RefObject derived type
* String
* Array
The actual Entry followed by the payloads are allocated and stored when writing in a MemoryArena. When we want to write into a stream, we can just iterate over each entry in order and write it out.
You may have spotted a problem here - that some Entry types can be stored without alignment (for example a string - which stores the length VarInt encoded followed by the characters). Others require an alignment - for example an NodeBase derived type that contains a int64_t will *require* 8 byte alignment. That as a feature of the serialization format we want to be able to just map the data into memory, and be able to access all the SerialType as is on the CPU. For that to work we *require* that the payload for each entry has the right alignment for the associated SerialType.
To achieve this we store in the Entry it's alignment requirement *AND* the next entries alignment. With this when we read, as we as stepping through the entries we can find where the next Entry starts. Because the payload comes directly after the Entry - the Entrys size must be a modulo of the largest alignment the payload can have.
For the code that does the conversion between native and serial types it uses either the SerialWriter or SerialReader. This provides the mechanism to turn a pointer into a serializable `SerialIndex` and vice versa. There are some special functions for turning string like types to and forth.
The final mechanism is that of 'Arrays'. An array allows reading or writing a chunk of data associated with a `SerialIndex`. The chunk of data *must* hold data that is serializable. If the array holds pointers - then the serialized array must hold an array of `SerialIndex` values that represent those pointers. When reading back in `SerialIndex` is converted back to a pointer.
Arrays are the escape hatch that allows for more complex types to serialize. Dictionaries for example are saved as a serial type that is two SerialIndices one to a keys array and one to a values array.
Note that writing has two phases, serializing out into an SerialWriter, and then secondly writing out to a stream.
## Object/Reference Types
When talking about Object/Reference types this means types that can be referenced natively as pointers. Currently that means `NodeBase` and `SerialRefObject` derived types.
The SerialTypeInfo mechanism is generally for *fields* of object types. That for derived types we use the C++ extractors field list to work out the native fields offsets and types. With this we can then calculate the layout for NodeBase types such that they follow the requirements for serialization - such as alignment and so forth.
This information is held in the SerialClasses, which for a given TypeKind/SubType gives a SerialClassInfo, that specifies fields for just that type.
It is trivial to work out the SubType for a NodeBase derived class - its just the astTypeNode member in the `NodeBase` type. For a SerialRefObject it is determined by first calling
```
const ReflectClassInfo* getClassInfo() const;
```
Then the m_classID in the `ReflectClassInfo` is the subtype.
## Reading
Due to the care in writing reading is relatively simple. We can just take the contents of the file and put in memory, as long as in memory it has an alignment of at least MAX_ALIGNMENT. Then we can build up an entries table by stepping through the data and writing the pointer.
The toNative functions take an SerialReader - this allows the implementation to ask for pointers and arrays from other parts of the serialized data. It also allows for types to be lazily reconstructed if necessary.
Lazy reconstruction may be useful in the future to partially reconstruct a sub part of the serialized data. In the current implementation, lazy evaluation is used on Strings. The m_objects array holds all of the recreated native 'objects'. Since the objects can be derived from different base classes the associated Entry will describe what it really is.
For the String type, we initially store the object pointer as null. If a string is requested from that index, we see if the object pointer is null, if it is we have to construct the StringRepresentation that will be used. An extra wrinkle is that we allow accessing of a serialized String as a Name or a string or a UnownedSubString. Fortunately a Name just holds a string, and a Name remains in scope as long as it's NamePool does which is passed in.
### Serial type replacement
In generalized serialization systems such as with Java there is a mechanism for reference types to replace their representation on writing, and then on reading replace the read type with the actual type. Write replacement is already used when serializing out modules via the `SerialFilter` mechanism. The actual implementation is `ModuleSerialFilter`, if an object is referenced in a different module that is explicitly specified, it is replaced with `ImportExternalDecl`, that names the actual definition to use.
Currently when deserializing, the `ImportExternalDecl` is *not* turned back into the item it references. This means there are likely pointers which point to invalid objects.
If we wanted to do a replacement on reconstruction we could
We could modify reading as follows.
1) Don't construct anything at the start
2) Find 'root's they must be created and deserialized first
. Any read/writeReplace is a root
. Any marked (like SourceLocData) is a root. (When deconstructed it also needs to add information to the Reader)
. The root of the objects (note we could just deserialize first to last if not already constructed)
3) During deserialization pointer references and constructed on demand
4) Extra code is needed to make sure there aren't cycles. Any object is either Pre/Created/Deserialized.
### Other reading issues
As touched on elsewhere SourceLoc information has to be carefully handled. Within the generalized serialization we have the additional problem that we probably don't want to attach SourceLoc or other types explicitly to the SerialReader/SerialWriter. The mechanism to work around this is via the `SerialExtraObjects` structure. This allows types to optionally be available to the Reader/Writer without it having to explicitly know anything about the type.
For all types supporting this mechanism they *require* that they are added to the `SerialExtraType` enum, and that they embed a static kExtraType field in the type. This solution is not as flexible as perhaps using a string map or something of that sort, but it does make lookup very fast and simple which is likely significant as many types contain the SourceLoc type for example.
## Identifying Types
How a NodeBase derived type identifies itself is not directly compatible with how a SerialRefObject represents itself. The NodeBase derived type uses `ASTNodeType` enum. The SerialRefObject uses a `RefObjectType` enum. Thus to uniquely identify a type we typically actually need two bits of information the `SerialTypeKind` as well as the `SerialSubType`.
```
enum class SerialTypeKind : uint8_t
{
Unknown,
String, ///< String
Array, ///< Array
NodeBase, ///< NodeBase derived
RefObject, ///< RefObject derived types
CountOf,
};
```
String and Array are special cases described elsewhere.
If the `SerialTypeKind` is `NodeBase`, then the `SerialSubType` *is* the ASTNodeType. If the `SerialTypeKind` is `RefObject` then the `SerialSubType` *is* RefObjectType.
`SerialClasses` holds the information on how to serialize non-field Serial types. For each `SerialTypeKind`/`SerialSubType` it holds a `SerialClass`. The SerialClass holds the size of the type, the amount of fields, and the field information. The fields themselves contain a `SerialFieldType` - this holds the pointers to the functions to convert to and from `native` to `serial` types.
In order to set up all types in a SerialClass without tying SerialClasses to an implementation the class `SerialClassesUtil` is used to set up Slang serialized types in a `SerialClasses` instance.
IR Serialization
================
Currently IR serialization is handled via a separate mechanism to 'generalized' serialization.
This mechanism is *much* simpler than generalized serialization, because by design the IR types are very homogeneous in style. There are a few special cases, but in general an instruction consists of
* It's type
* A SourceLoc
* 0 or more operands.
* 0 or more children.
Within the IR instructions are pointers to IRInst derived types. As previously discussed serializing pointers directly is generally not a good idea. To work around this the pointers are turned into 32 bit indices. Additionally we know that an instruction can belong to at most one other instruction.
When serializing out special handling is made for child instructions - their indices are made to be a contiguous range of indices for all instructions that belong to each parent. The indices are ordered into the same order as the children are held in the parent. By using this mechanism it is not necessary to directly save off the indices that belong to a parent, only the range of indices.
The actual serialization mechanism is similar to the generalized mechanism - referenced objects are saved off in order of their indices. What is different is that the encoding fixes the size of the Inst to `IRSerialData`. That this can hold up to two operands, if the instruction has more than two operands then one of the UInt32 is the operand count and the other is an offset to a list of operands. It probably makes sense to alter this in the future to stream the instructions payload directly.
IR serialization allows a simple compression mechanism, that works because much of the IR serialized data is UInt32 data, that can use a variable byte encoding.
AST Serialization
=================
AST serialization uses the generalized serialization mechanism.
When serializing out an AST module it is typical to want to just serialize out the definitions within that module. Without this, the generalized serializer will crawl over the whole of the AST structure serializing every thing that can be reached - including the whole of the core module.
The filter `ModuleSerialFilter` can be used when writing the AST module, it will replace any references to elements outside of the current module with a `ImportExternalDecl`. This contains a mangled name to the item being referenced in another module.
When serializing back in, it may be possible to turn these references into the actual element, if the module containing the definition has been loaded. This probably can't work in general though, as if we have two modules that reference items in the other, then it isn't possible to fix up on load.
A way around this would be to not replace on reading (or only replace items that can be found). Then go through the `ImportExternalDecl` elements doing the lookup, and potentially loading other modules. There are several issues here though
* On first loading pointers that have been replaced will claim to be a type they are typically *NOT*
* Once we have determined what `ImportExternalDecl` should replaced with, how do we replace it?
On the first point, this is perhaps undesirable (on a variety of levels - such as debugging), but isn't as terrible as it could be, as the actual type identification is managed by Slang via the `astTypeNode`. So there is a simple way of identifying what the type actually is.
On the second point - this isn't so simple. If we had an indirection, we could do the replacement quickly and trivially, without having to to fix up all the pointers. We probably don't want to add such an indirection into the pointer based system so choices are
* Store where all the pointers are, and fix them up
* Traverse the hierarchy replacing pointers
Within the current mechanism storing where all the pointers are is not so simple - it would require the setting of any pointer to record where that pointer is stored, and for that to remain the location. Doing so would require setting all pointers to go through some recording mechanism. Pointers held in containers - like the Dictionary may not be directly available. Moreover even if they *were* doing such a behavior may break the containers invariants - for example replacing a keys pointer, may change it's hash.
Traversing the hierarchy would be something akin to the serialization process. It would require specially handling for field types to do the replacement. There would need to be special handling for struct value types.
SourceLoc Serialization
=======================
SourceLoc serialization presents several problems. Firstly we have two distinct serialization mechanisms that need to use it - IR serialization and generalized serialization. That being the case it cannot be saved directly in either, even though it may be referenced by either.
To keep things simple for now we build up SourceLoc information for both IR and general serialization via their writers adding their information into a SerialSourceLocWriter. Then we can save this information into a RIFF section, that can be loaded before either general or IR deserialization is used.
When reading the SourceLoc information has to be located and deserialized before any AST or IR deserialization. The SourceLoc data can then be turned into a SerialSourceLocReader, which is then either set on the `SerialReaders` `SerialExtraObjects`. Or passed to the `IRSerialReader`.
Riff Container
==============
[Riff](https://en.wikipedia.org/wiki/Resource_Interchange_File_Format) is used as a mechanism to store binary sections. The format allows for a hierarchy of `chunks` that hold binary data. How the data is interpreted depends on the [FOURCC](https://en.wikipedia.org/wiki/FourCC) associated with each chunk.
As previously touched on there are multiple different mechanisms used for serialization. IR serialization, generalized serialization, SourceLoc serialization - there are also other uses, such as serializing of entry point information. Riff is used to combine all of these incompatible binary parts together such that they can be stored together.
The handling of these riff containers is held within the `SerialContainerUtil` class.
C++ Extractor
=============
The C++ Extractor is the tool `slang-cpp-extractor` that can be used to example C++ files to extract class definitions and associated fields. These files contain in the form of macros information about each class as well as reflected fields. These generated files can then be used to implement serialization without having to explicitly specify fields in C++ source code.
Issues
======
* No support for forward/backward compatibility.
** Adding fields/classes will typically break compatibility
* Binary files do not contain data to describe themselves
** It is *not* possible to write a stand alone tool that can dump any serialized file - it's iterpretation depends on the version of Slang it was written from
* The Riff mechanism use for container usage is somewhat ad-hoc
* Re-referencing AST nodes from other modules does not happen automatically on deserialization
* There are several mechanisms used for serialization that are not directly compatible
## C++ extractor issues
* All types (and typedefs) that are serialized must be defined in the same scope - child types don't work correctly
* When using value serialization serialization all the members that are serializable must be public
* The types output in slang fields do not correctly take into account scope (this is a similar issue to the issue above)

View file

@ -1,260 +0,0 @@
Core Module Intrinsics
======================
The following document aims to cover a variety of systems used to add target specific features. They are most extensively used in the slang core module.
**NOTE!** These features should *not* be considered stable! They can be used in regular slang code to add features, but they risk breaking with any Slang version change. Additionally the features implementation can be very particular to what is required for a specific feature set, so might not work as expected in all scenarios.
As these features are in flux, it is quite possible this document is behind the current features available within the Slang code base.
If you want to add support for a feature for a target to Slang, implementing it as a part of the Slang standard modules is typically a good way to progress. Depending on the extension/feature it may not be possible to add support exclusively via changes to the standard module alone. That said most support for target specific extensions and features involve at least some changes to the slang standard modules including the core module, and typically using the mechanisms described here.
## Core Module
The main place these features are used are within the slang core module. This is implemented with a set of slang files within the slang project
* core.meta.slang
* hlsl.meta.slang
* diff.meta.slang
Looking at these files will demonstrate the features in use.
Most of the intrinsics and attributes have names that indicate that they are not for normal use. This is typically via a `__` prefix.
The `.meta.slang` files look largely like Slang source files, but their contents can also be generated programmatically with C++ code. A section of code can drop into `C++` code if it is proceeded by `${{{{`. The C++ section is closed with a closing `}}}}`. This mechanism is typically used to generate different versions of a similar code sequence. Values from the C++ code can be accessed via the `$()`, where the contents of the brackets specifies something that can be calculated from within the C++ code.
As an example, to produce an an array with values 0 to 9 we could write...
```slang
// Slang code
${{{{
// C++ code, calling out to a C++ function getTime, the result is held in variable time
int cppTime = getTime();
}}}}
// Back to Slang code, can access the C++ variable previously defined as cppTime. Due to $().
// The code inside the $() is executed on the C++ side, so can do calculations. In practice it would be easier
// to just use call $(getTime() + 1), but this demonstrates variables are accessible.
int slangTime = $(cppTime + 1);
```
# Attributes
## [__readNone]
A `[__readNone]` indicates a function that computes its results strictly based on argument values, without reading or writing through any pointer arguments, or any other state that could be observed by a caller.
## [__NoSideEffect]
Specifies a function declaration has no observable side effects.
## [__unsafeForceInlineEarly]
Inlines the contained code, but does so very early stage. Being earlier allows allows some kinds of inlining transformations to work, that wouldn't work with regular inlining. It also means it must be used with *care*, because it may produce unexpected results for more complex scenarios.
## [__NonCopyableType]
Marks a type to be non-copyable, causing SSA pass to skip turning variables of the the type into SSA values.
## [__AlwaysFoldIntoUseSiteAttribute]
A call to the decorated function should always be folded into its use site.
## [KnownBuiltin("name")]
A `[KnownBuiltin("name")]` attribute allows the compiler to identify this declaration during compilation, despite obfuscation or linkage removing optimizations
# Intrinsics
<a id="target-intrinsic"></a>
## __target_intrinsic(target, expansion)
This is a widely used and somewhat complicated intrinsic. Placed on a declaration it describes how the declaration should be emitted for a target. The complexity is that `expansion` is applied via a variety of rules. `target` is a "target capability", commonly it's just the emit target for the intrinsic, so one of...
* hlsl
* glsl
* cuda - CUDA
* cpp - C++ output (used for exe, shared-library or host-callable)
* spirv - Used for slangs SPIR-V direct mechanism
A function definition can have a `target_intrinsic` *and* a body. In that case, the body will be used for targets where the `target_intrinsic` isn't defined.
If the intrinsic can be emitted as is, the expansion need not be specified. If only the *name* needs to changed (params can be passed as is), only the name to be expanded to needs to be specified *without* `()`. In this scenario it is not necessary to specify as a string in quotes, and just the identifier name can be used.
Currently `HLSL` has a special handling in that it is *assumed* if a declaration exists that it can be emitted verbatim to HLSL.
The target can also be a capability atom. The atoms are listed in "slang-capability-defs.h".
What is perhaps of importance here is that for some features for a specific target can have multiple ways of achieving the same effect - for example "GL_NV_ray_tracing" and "GL_EXT_ray_tracing" are two different ray tracing extensions available for Vulkan through GLSL. The `-profile` option can disambiguate which extension is actually desired, and the capability with that name on the `target_intrinsic` specifies how to implement that feature for that specific extension.
The expansion mechanism is implemented in "slang-intrinsic-expand.cpp" which will be most up to date.
The `expansion` value can be a string or an identifier. If it is an identifier, it will just be emitted as is replacing the name of the declaration the intrinsics is associated with.
Sections of the `expansion` string that are to be replaced are prefixed by the `$` sigil.
* $0-9 - Indicates the parameter at that index. For a method call $0 is `this`.
* $T0-9 - The type for the param at the index. If the type is a texture resource derived type, returns the *element* type.
* $TR - The return type
* $G0-9 - Replaced by the type/value at that index of specialization
* $S0-9 - The scalar type of the generic at the index.
* $p - Used on texturing operations. Produces the combined texture sampler arguments as needed for GLSL.
* $C - The $C intrinsic is a mechanism to change the name of an invocation depending on if there is a format conversion required between the type associated by the resource and the backing ImageFormat. Currently this is only implemented on CUDA, where there are specialized versions of the RWTexture writes that will do a format conversion.
* $E - Sometimes accesses need to be scaled. For example in CUDA the x coordinate for surface access is byte addressed. $E will return the byte size of the *backing element*.
* $c - When doing texture access in GLSL the result may need to be cast. In particular if the underlying texture is 'half' based, GLSL only accesses (read/write) as float. So we need to cast to a half type on output. When storing into a texture it is still the case the value written must be half - but we don't need to do any casting there as half is coerced to float without a problem.
* $z - If we are calling a D3D texturing operation in the form t.Foo(s, ...), where `t` is a Texture&lt;T&gt;, then this is the step where we try to properly swizzle the output of the equivalent GLSL call into the right shape.
* $N0-9 - Extract the element count from a vector argument so that we can use it in the constructed expression.
* $V0-9 - Take an argument of some scalar/vector type and pad it out to a 4-vector with the same element type (this is the inverse of `$z`).
* $a - We have an operation that needs to lower to either `atomic*` or `imageAtomic*` for GLSL, depending on whether its first operand is a subscript into an array. This `$a` is the first `a` in `atomic`, so we will replace it accordingly.
* $A - We have an operand that represents the destination of an atomic operation in GLSL, and it should be lowered based on whether it is an ordinary l-value, or an image subscript. In the image subscript case this operand will turn into multiple arguments to the `imageAtomic*` function.
* $XP - Ray tracing ray payload
* $XC - Ray tracing callable payload
* $XH - Ray tracing hit object attribute
* $P - Type-based prefix as used for CUDA and C++ targets (I8 for int8_t, F32 - float etc)
## __specialized_for_target(target)
Specialized for target allows defining an implementation *body* for a particular target. The target is the same as is used for [__target_intrinsic](#target-intrinsic).
A declaration can consist of multiple definitions with bodies (for each target) using, `specialized_for_target`, as well as having `target_intrinsic` if that is applicable for a target.
## __attributeTarget(astClassName)
For an attribute, specifies the AST class (and derived class) the attribute can be applied to.
## __builtin
Identifies the declaration is being "builtin".
## __builtin_requirement(requirementKind)
A modifier that indicates a built-in associated type requirement (e.g., `Differential`). The requirement is one of `BuiltinRequirementKind`.
The requirement value can just be specified via the `$()` mechanism.
## __builtin_type(tag)
Specifies a builtin type - the integer value of one of the enumeration BaseType.
## __magic_type(clsName, tag)
Used before a type declaration. The clsName is the name of the class that is used to represent the type in the AST in Slang *C++* code. The tag is an optional integer value that is in addition and meaningful in the context of the class type.
##__intrinsic_type(op)
Used to specify the IR opcode associated with a type. The IR opcode is listed as something like `$(kIROp_HLSLByteAddressBufferType)`, which will expand to the integer value of the opcode (because the opcode value is an enum value that is visible from C++). It is possible to just write the opcode number, but that is generally inadvisable as the ids for ops are not stable. If a code change in Slang C++ adds or removes an opcode the number is likely to be incorrect.
As an example from the core module
```slang
__magic_type(HLSLByteAddressBufferType)
__intrinsic_type($(kIROp_HLSLByteAddressBufferType))
struct ByteAddressBuffer
{
// ...
};
```
# General
## __generic<>
Is an alternate syntax for specifying a declaration that is generic. The more commonly used form is to list the generic parameters in `<>` after the name of the declaration.
## attribute_syntax
Attribute syntax provides a mechanism to introduce an attribute type in Slang.
Right now the basic form is:
```
attribute_syntax [name(parmName: paramType, ...)] : syntaxClass;
```
There can be 0 or more params associated with the attribute, and if so the () are not needed.
* `name` gives the name of the attribute to define.
* `paramName` is the name of param that are specified with attribute use
* `paramType` is the type of the value associated with the param
* `syntaxClass` is the name of an AST node class that we expect this attribute to create when checked.
For example
```
__attributeTarget(FuncDecl)
attribute_syntax [CudaDeviceExport] : CudaDeviceExportAttribute;
```
Defines an attribute `CudaDeviceExport` which can only be applied to FuncDecl or derived AST types. Once semantically checked will be turned into a `CudaDeviceExportAttribute` attribute in the AST.
With a parameter
```
__attributeTarget(InterfaceDecl)
attribute_syntax [anyValueSize(size:int)] : AnyValueSizeAttribute;
```
Defines an attribute `anyValueSize` that can be applied to `InterfaceDecl` and derived types. It takes a single parameter called `anyValueSize` of `int` type.
## Ref<T>
Allows returning or passing a value "by reference".
# GLSL/Vulkan specific
## __glsl_version(version)
Used to specify the GLSL version number that is required for the subsequent declaration. When Slang emits GLSL source, the version at the start of the file, will be the largest version seen that emitted code uses.
For example
```slang
__glsl_version(430)
```
## __glsl_extension
Specifies the GLSL extension that is required for the declaration to work. A declaration that has the intrinsic, when output to GLSL will additionally add `#extension` to the the GLSL or SPIR-V output.
Multiple extensions can be applied to a decoration if that is applicable, if there are multiple ways of implementing that can be emitted in the same manner (see the section around [target](#target-intrinsic)) for more details.
## __spirv_version
When declaration is used for SPIR-V target will take the highest value seen to be the SPIR-V version required. For compilation through GLSLANG, the value is passed down to to GLSLANG specifying this SPIR-V is being targeted.
Example
```
__spirv_version(1.3)
```
## vk::spirv_instruction
Provides a way to use a limited amount of `GL_EXT_spirv_intrinsics` the extension.
```
vk::spirv_instruction(op, set)
```
Op is the integer *value* for the op. The `set` is optional string which specifies the instruction set the op is associated with.
For example
```
__specialized_for_target(glsl)
[[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]]
void debugBreak();
```
# CUDA specific
## __cuda_sm_version
When declaration is used with this intrinsic for a CUDA target, the highest shader model seen will be passed down to the downstream CUDA compile (NVRTC).
# NVAPI
## [__requiresNVAPI]
If declaration is reached during a compilation for an applicable target (D3D11/12), will indicate that [NVAPI support](../nvapi-support.md) is required for declaration to work.

View file

@ -1,114 +0,0 @@
Slang Doc System
================
Slang contains a rudimentary documentation generation system. The mechanism used to mark up source is similar to [doxygen](https://www.doxygen.nl/manual/docblocks.html). Namely
```
/**
... text ... (JavaDoc style)
*/
void someFunctionA() {}
/*!
.. text .. (QT style)
another line
*/
void someFunctionB() {}
/// ... text ... (Multi line)
/// another line
void someFunctionC() {}
//!... text ... (QT Multi line)
//! another line
void someFunctionD() {}
```
All of the above examples will add the documentation for the declaration that appears after them. Also note that this slightly diverges from doxygen in that an empty line before and after in a multi line comment is *not* required.
We can also document the parameters to a function similarly
```
/// My function
void myFunction(
/// The A parameter
int a,
/// The B parameter
int b);
```
If you just need a single line comment to describe something, you can place the documentation after the parameter as in
```
/// My function
void myFunction( int a, //< The A parameter
int b) //< The B parameter
{}
```
This same mechanisms work for other kinds of common situations such as with enums
```
/// An enum
enum AnEnum
{
Value, ///< A value
/// Another value
/// With a multi-line comment
AnotherValue,
};
```
Like `doxygen` we can also have multi line comments after a declaration for example
```
/// An enum
enum AnEnum
{
Value, ///< A value
///< Some more information about `Value`
/// Another value
/// With a multi-line comment
AnotherValue,
};
```
To actually get Slang to output documentation you can use the `-doc` option from the `slangc` command line, or pass it in as parameter to `spProcessCommandLineArguments` or `processCommandLineArguments`. The documentation is currently output by default to the same `ISlangWriter` stream as diagnostics. So for `slangc` this will generally mean the terminal/stderr.
Currently the Slang doc system does not support any of the 'advanced' doxygen documentation features. If you add documentation to a declaration it is expected to be in [markdown](https://guides.github.com/features/mastering-markdown/).
Currently the only documentation style supported is a single file 'markdown' output. Future versions will support splitting into multiple files and linking between them. Also future versions may also support other documentation formats/standards.
It is possible to generate documentation for the slang core module. This can be achieved with `slangc` via
```
slangc -doc -compile-core-module
```
The documentation will be written to a file `stdlib-doc.md`.
It should be noted that it is not necessary to add markup to a declaration for the documentation system to output documentation for it. Without the markup the documentation is going to be very limited, in essence saying the declaration exists and other aspects that are available from the source. This may not be very helpful. For this reason and other reasons there is a mechanism to control the visibility of items in your source.
There are 3 visibility levels 'public', 'internal' and 'hidden'/'private'. There is a special comment that controls visibility for subsequent lines. The special comment starts with `//@` as shown below.
```
//@ public:
void thisFunctionAppearsInDocs() {}
//@ internal:
void thisFunctionCouldAppearInInternalDocs() {}
//@ hidden:
void thisFunctionWillNotAppearInDocs() {}
```

View file

@ -1,42 +0,0 @@
Frequently Asked Questions
==========================
### How did this project start?
The Slang project forked off from the ["Spire"](https://github.com/spire-lang/spire) shading language research project.
In particular, Slang aims to take the lessons learned in that research effort (about how to make more productive shader compilation languages and tools) and apply them to a stystem that is easier to adopt, and hopefully more amenable to production use.
### Why should I use Slang instead of glslang, hlsl2glslfork, the Microsoft open-source HLSL compiler, etc.?
If you are mostly just shopping around for a tool to get HLSL shaders working on other graphics APIs, then [this](http://aras-p.info/blog/2014/03/28/cross-platform-shaders-in-2014/) blog post is probably a good place to start.
If one of those tools meets your requirements, then you should probably use it.
Slang is a small project, and early in development, so you might find that you hit fewer bumps in the road with one of the more established tools out there.
The goal of the Slang project is not to make "yet another HLSL-to-GLSL translator," but rather to create a shading language and supporting toolchain that improves developer productivity (and happiness) over the existing HLSL language and toolchain, while providing a reasonable adoption path for developers who have an existing investment in HLSL shader code.
If you think that is something interesting and worth supporting, then please get involved!
### What would make a shading language more productive?
This is probably best answered by pointing to the most recent publication from the Spire research project:
[Shader Components: Modular and High Performance Shader Development](http://graphics.cs.cmu.edu/projects/shadercomp/)
Some other papers for those who would like to read up on our inspiration:
[A System for Rapid Exploration of Shader Optimization Choices](http://graphics.cs.cmu.edu/projects/spire/)
[Spark: Modular, Composable Shaders for Graphics Hardware](https://graphics.stanford.edu/papers/spark/)
### Who is using Slang?
Right now the only user of Slang is the [Falcor](https://github.com/NVIDIA/Falcor) real-time rendering framework developed and used by NVIDIA Research.
The implementation of Slang has so far focused heavily on the needs of Falcor.
### Won't we all just be using C/C++ for shaders soon?
The great thing about both Vulkan and D3D12 moving to publicly-documented binary intermediate languages (SPIR-V and DXIL, respectively) is that there is plenty of room for language innovation on top of these interfaces.
Having support for writing GPU shaders in a reasonably-complete C/C++ language would be great.
We are supportive of efforts in the "C++ for shaders" direction.
The Slang effort is about trying to solve the challenges that are unique to the real-time graphics domain, and that won't magically get better by switching to C++.

View file

@ -1,264 +0,0 @@
---
layout: user-guide
---
Getting Started with Slang Graphics Layer
============================================
[//]: # (ShortTitle: Getting Started)
In this article, we provide instructions on installing the graphics layer into your application, and demonstrate the basic use of the graphics layer via a simple compute shader example. We will use the same [hello-world.slang](https://github.com/shader-slang/slang/blob/master/examples/hello-world/hello-world.slang) shader from the `hello-world` example in the [Slang getting started tutorial](../user-guide/01-get-started.html).
Installation
------------------
### Obtain Release Package
The Slang graphics library is implemented in `gfx.dll` (`libgfx.so` in unix systems). Since Slang is tightly integrated into the graphics layer, you need to include both `slang.dll` and `gfx.dll` in your application. Official Slang releases provide prebuilt binaries for both libraries as well as the header files to use them. If you prefer to build the libraries yourself, please follow [build instructions](../building).
### Install Header Files
Once you have built or obtained a Slang release, make the following header files from the release package accessible to your application:
- `slang-gfx.h`
- `slang.h`
- `slang-com-ptr.h`
- `slang-com-helper.h`
### Linking the Library
On Windows (with `msvc`), make sure that `gfx.lib` is provided as linker input via the `Linker->Input->Additional Dependencies` project configuration. On Unix systems, make sure to pass `-lgfx` when compiling your application.
Creating a GPU Device
---------------------------
To start using the graphics layer, create an `IDevice` object by calling `gfxCreateDevice`. The `IDevice` interface is the main entry-point to interact with the graphics layer. It represent GPU device context where all interactions with the GPU take place.
```cpp
#include "slang-gfx.h"
using namespace gfx;
IDevice* gDevice = nullptr;
void initGfx()
{
IDevice::Desc deviceDesc = {};
gfxCreateDevice(deviceDesc, &gDevice);
}
```
The `IDevice::Desc` struct passed to `gfxCreateDevice` defines many configurations on how a device shall be created. Most notably, the `deviceType` field specifies what underlying graphics API to use. By default, `gfxCreateDevice` will attempt to use the best API available on current platform. On Windows, the layer will prefer to use `D3D12` but will also try to use `Vulkan`, `D3D11`, `OpenGL` in order, in case the former API isn't available. On Unix systems, it will always default to `Vulkan` since this is the only API that supports full Graphics capabilities. A user can always specify the `deviceType` field to force the layer to use a specific API. If the device creation succeeds, `gfxCreateDevice` will return `SLANG_OK(0)`.
Similar to the Slang API, objects created by the graphics layer also conforms to the COM standard. The user to responsible for calling `release` method on every object returned to the user by the layer to prevent memory leaks.
Enabling the Debug Layer
--------------------------
The Slang Graphics Layer provides a debug layer that can be enabled to perform additional validations to ensure correctness. To enable the debug layer, simply call `gfxEnableDebugLayer` before calling `gfxCreateDevice`.
To receive diagnostic messages, you need to create a class that implements the `IDebugCallback` interface, and call `gfxSetDebugCallback` to provide the callback instance to the graphics layer. For example:
```cpp
struct MyDebugCallback : public IDebugCallback
{
virtual SLANG_NO_THROW void SLANG_MCALL handleMessage(
DebugMessageType type,
DebugMessageSource source,
const char* message) override
{
printf("%s\n", message);
}
};
MyDebugCallback gCallback;
void initGfx()
{
gfxEnableDebugLayer();
gfxSetDebugCallback(&gCallback);
IDevice::Desc deviceDesc = {};
gfxCreateDevice(&deviceDesc, &gDevice);
}
```
Creating a Command Queue
------------------------------
A command queue is where the GPU device takes commands from the application to execute. To create a command queue, call `IDevice::createCommandQueue`.
```cpp
ICommandQueue* gQueue = nullptr;
ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics};
device->createCommandQueue(queueDesc, &gQueue);
```
Allocating a Command Buffer
------------------------------
A command buffer is treated as a _transient_ resource by the graphics layer. A transient resource is required by the GPU during execution of a task, and are no longer needed when the execution has completed. Slang graphics layer provides an `ITransientResourceHeap` object to efficiently manage the life cycle of transient resources. In order to allocate a command buffer, we need to create an `ITransientResourceHeap` object first by calling `IDevice::createTransientResourceHeap`.
```cpp
ITransientResourceHeap* gTransientHeap;
ITransientResourceHeap::Desc transientHeapDesc = {};
transientHeapDesc.constantBufferSize = 4096;
device->createTransientResourceHeap(transientHeapDesc, &gTransientHeap);
```
With a `TransientResourceHeap`, we can call `createCommandBuffer` method to allocate a command buffer:
```cpp
ICommandBuffer* commandBuffer;
gTransientHeap->createCommandBuffer(&commandBuffer);
```
A user should regularly call `ITransientResourceHeap::synchronizeAndReset` to recycle all previously allocated transient resources. A standard practice is to create two `TransientResourceHeap`s in a double-buffered renderer, and alternate the transient heap on each frame to allocate command buffers and other transient resources. With this setup, the application can call `synchronizeAndReset` at start of each frame on the corresponding transient resource heap to make sure all transient resources are timely recycled.
Creating Buffer Resource
------------------------------
We need to create the buffer resources used our `hello-world` shader as input and output. This can be done via `IDevice::createBufferResource` method. When creating a resource, the user must specify a resource state that the resource will be in by default, as well as all allowed resource states the resource can be in. Resource states in the graphics layer follows the same model of resource states in D3D12, and the user can also assume the same automatic resource promotion/demotion behavior in D3D12.
```cpp
const int numberCount = 4;
float initialData[] = {0.0f, 1.0f, 2.0f, 3.0f};
IBufferResource::Desc bufferDesc = {};
bufferDesc.sizeInBytes = numberCount * sizeof(float);
bufferDesc.format = Format::Unknown;
bufferDesc.elementSize = sizeof(float);
bufferDesc.defaultState = ResourceState::UnorderedAccess;
bufferDesc.allowedStates = ResourceStateSet(ResourceState::UnorderedAccess,
ResourceState::ShaderResource);
IBufferResource* inputBuffer0;
SLANG_RETURN_ON_FAIL(device->createBufferResource(
bufferDesc,
(void*)initialData,
&inputBuffer0));
```
Creating a Pipeline State
---------------------------
A pipeline state object encapsulates the shader program to execute on the GPU device, as well as other fix function states for graphics rendering. In this example, we will be compiling and running a simple compute shader written in Slang. To do that we need to create a compute pipeline state from a Slang `IComponentType`. We refer the reader to the (Slang getting started tutorial)[../user-guide/01-getting-started.html] on how to create a Slang `IComponentType` from a shader file. The following source creates a Graphics layer `IPipelineState` object from a shader module represented by a `slang::IComponentType` object:
```cpp
void createComputePipelineFromShader(
IComponentType* slangProgram,
IPipelineState*& outPipelineState)
{
// The `IComponentType` parameter that represents the compute
// kernel, we can use it to create a `IShaderProgram` object in the graphics
// layer.
IShaderProgram* shaderProgram = nullptr;
IShaderProgram::Desc programDesc = {};
programDesc.pipelineType = PipelineType::Compute;
programDesc.slangProgram = slangProgram;
gDevice->createShaderProgram(programDesc, &shaderProgram);
// Create a compute pipeline state from `shaderProgram`.
ComputePipelineStateDesc pipelineDesc = {};
pipelineDesc.program = shaderProgram;
gDevice->createComputePipelineState(pipelineDesc, &outPipelineState);
// Since we no longer need to use `shaderProgram` after creating
// a pipeline state, we should release it to prevent memory leaks.
shaderProgram->release();
}
```
Recording Commands to Run a Compute Shader
------------------------------------
[//]: # (ShortTitle: Recording Commands)
Now that we have created all the resources and allocated a command buffer, we can start recording commands to
set the compute pipeline state, bind shader parameters, and dispatch a kernel launch.
Since we are only using compute commands, we begin the recording by calling `ICommandBuffer::encodeComputeCommands`. This methods returns a transient `IComputeCommandEncoder` object for accepting actual compute commands.
```cpp
IComputeCommandEncoder* encoder = commandBuffer->encodeComputeCommands();
```
The first command is to bind the pipeline state we created earlier:
```cpp
IShaderObject* rootObject = encoder->bindPipeline(pipelineState);
```
Binding a pipeline state yields a transient `IShaderObject` object. We can use the `IShaderObject` instance to bind shader parameters. For the `hello-world` shader, we need to bind three parameters: `buffer0`, `buffer1` and `result`.
```cpp
// Create a resource view for buffer0.
IBufferView* buffer0View;
{
IResourceView::Desc viewDesc = {};
viewDesc.type = IResourceView::Type::ShaderResource;
viewDesc.format = Format::Unknown;
SLANG_RETURN_ON_FAIL(device->createBufferView(inputBuffer0, viewDesc, &buffer0View));
}
// Bind the resource view to shader.
rootObject->setResource(ShaderOffset{0,0,0}, buffer0View);
// Create a resource view for buffer1.
IBufferView* buffer1View;
{
IResourceView::Desc viewDesc = {};
viewDesc.type = IResourceView::Type::ShaderResource;
viewDesc.format = Format::Unknown;
SLANG_RETURN_ON_FAIL(device->createBufferView(inputBuffer1, viewDesc, &buffer1View));
}
// Bind the resource view to shader.
rootObject->setResource(ShaderOffset{0,1,0}, buffer1View);
// Create a resource view for resultBuffer.
IBufferView* resultView;
{
IResourceView::Desc viewDesc = {};
viewDesc.type = IResourceView::Type::UnorderedAccess;
viewDesc.format = Format::Unknown;
SLANG_RETURN_ON_FAIL(device->createBufferView(resultBuffer, viewDesc, &resultView));
}
rootObject->setResource(ShaderOffset{0,2,0}, resultView);
```
> #### Note
> Since `rootObject` is a transient object returned by the command encoder, it is automatically released
> with the command encoder. Calling `release` on `rootObject` is OK but not needed.
After binding all shader parameters, we can now dispatch the kernel:
```cpp
encoder->dispatchCompute(1, 1, 1);
```
> #### Note
> Command encoders are transient objects managed by a command buffer, it is automatically released
> with the command buffer. Calling `release` on `rootObject` is OK but not needed.
When we are done recording commands, we need to close the command encoder and the command buffer.
```cpp
encoder->endEncoding();
commandBuffer->close();
```
Now we are ready to submit the command buffer to the command queue, and wait for the GPU execution to finish.
```cpp
gQueue->executeCommandBuffer(commandBuffer);
gQueue->wait();
```
Cleaning Up
----------------
At the end of our example, we need to make sure all created objects are released by calling the `release` method:
```cpp
commandBuffer->release();
gQueue->release();
gTransientResourceHeap->release();
inputBuffer0->release();
buffer0View->release();
...
gDevice->release();
```
The order of calls to `release` does not matter, as long as all objects are released from the user.

View file

@ -1,25 +0,0 @@
---
layout: user-guide
---
Slang Graphics Layer
=============
The Slang Graphics Layer is an abstraction library of graphics APIs to support cross-platform applications that utilize GPU graphics/compute capabilities. The Slang Graphics Layer tightly integrates the Slang shading language to provide the most complete cross-platform GPU application development experience. The Slang language and compilation API is designed to work best when the application assumes several best practices in terms of shader specialization and parameter binding. The Slang Graphics Layer is following exactly the same best practices supported by Slang's compilation model. Outside of shader-related areas, the graphics layer's interface is designed to closely follow the modern graphics API models in Direct3D 12, Vulkan and Metal, such that the layer is only purposed to abstracting the differences between these underlying APIs instead of providing a higher level abstract that simplifies the interface. This design philosophy allows users to benefit from the ideas in the Slang shading language without giving up precise control on other aspects of the graphics API.
The current support status of operating system and graphics APIs is shown in the following matrix.
| | Windows | Linux |
| :------------ | :----------------: | :----------------: |
| Direct3D 12 | Yes | No |
| Direct3D 11 | Yes | No |
| Vulkan | Yes | Yes |
| OpenGL | Yes | No |
| CPU emulation | Yes (Compute Only) | Yes (Compute Only) |
| CUDA | Yes (Compute Only) | Yes (Compute Only) |
> #### Note
> The graphics layer is still under active development and we intend to add more platforms and APIs in the future.
In this documentation, we will walk through various parts of the library and demonstrate how it can be used in your application.

View file

@ -1,5 +0,0 @@
<nav>
<li><a href="../../">Docs</a></li>
<li><a href="index.html">Slang Graphics Layer</a></li>
</nav>

View file

@ -1,18 +0,0 @@
<ul class="toc_root_list"><li data-link="index"><span>Slang Graphics Layer</span>
<ul class="toc_list">
<li data-link="01-getting-started"><span>Getting Started</span>
<ul class="toc_list">
<li data-link="01-getting-started#installation"><span>Installation</span></li>
<li data-link="01-getting-started#creating-a-gpu-device"><span>Creating a GPU Device</span></li>
<li data-link="01-getting-started#enabling-the-debug-layer"><span>Enabling the Debug Layer</span></li>
<li data-link="01-getting-started#creating-a-command-queue"><span>Creating a Command Queue</span></li>
<li data-link="01-getting-started#allocating-a-command-buffer"><span>Allocating a Command Buffer</span></li>
<li data-link="01-getting-started#creating-buffer-resource"><span>Creating Buffer Resource</span></li>
<li data-link="01-getting-started#creating-a-pipeline-state"><span>Creating a Pipeline State</span></li>
<li data-link="01-getting-started#recording-commands-to-run-a-compute-shader"><span>Recording Commands</span></li>
<li data-link="01-getting-started#cleaning-up"><span>Cleaning Up</span></li>
</ul>
</li>
</ul>
</li>
</ul>

View file

@ -1,266 +0,0 @@
Unsupported Formats
======================
GFX currently does not support the following listed D3D and Vulkan formats.
With the exception of `D24_UNORM_S8_UINT`, these formats have been omitted as
their counterpart API does not have a corresponding format. `D24_UNORM_S8_UINT`
has been omitted as it is only supported by Nvidia.
- `DXGI_FORMAT_R32G8X24_TYPELESS`
- `DXGI_FORMAT_D32_FLOAT_S8X24_UINT`
- `DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS`
- `DXGI_FORMAT_X32_TYPELESS_G8X24_UINT`
- `DXGI_FORMAT_R24G8_TYPELESS`
- `DXGI_FORMAT_D24_UNORM_S8_UINT`
- `DXGI_FORMAT_R24_UNORM_X8_TYPELESS`
- `DXGI_FORMAT_X24_TYPELESS_G8_UINT`
- `DXGI_FORMAT_A8_UNORM`
- `DXGI_FORMAT_R1_UNORM`
- `DXGI_FORMAT_R8G8_B8G8_UNORM`
- `DXGI_FORMAT_G8R8_G8B8_UNORM`
- `DXGI_FORMAT_BC1_TYPELESS`
- `DXGI_FORMAT_BC2_TYPELESS`
- `DXGI_FORMAT_BC3_TYPELESS`
- `DXGI_FORMAT_BC4_TYPELESS`
- `DXGI_FORMAT_BC5_TYPELESS`
- `DXGI_FORMAT_B8G8R8X8_UNORM`
- `DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM`
- `DXGI_FORMAT_B8G8R8X8_TYPELESS`
- `DXGI_FORMAT_B8G8R8X8_UNORM_SRGB`
- `DXGI_FORMAT_BC6H_TYPELESS`
- `DXGI_FORMAT_BC7_TYPELESS`
- `DXGI_FORMAT_AYUV`
- `DXGI_FORMAT_Y410`
- `DXGI_FORMAT_Y416`
- `DXGI_FORMAT_NV12`
- `DXGI_FORMAT_P010`
- `DXGI_FORMAT_P016`
- `DXGI_FORMAT_420_OPAQUE`
- `DXGI_FORMAT_YUY2`
- `DXGI_FORMAT_Y210`
- `DXGI_FORMAT_Y216`
- `DXGI_FORMAT_NV11`
- `DXGI_FORMAT_AI44`
- `DXGI_FORMAT_IA44`
- `DXGI_FORMAT_P8`
- `DXGI_FORMAT_A8P8`
- `DXGI_FORMAT_P208`
- `DXGI_FORMAT_V208`
- `DXGI_FORMAT_V408`
- `DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE`
- `DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE`
- `VK_FORMAT_R4G4_UNORM_PACK8`
- `VK_FORMAT_R4G4B4A4_UNORM_PACK16`
- `VK_FORMAT_B4G4R4A4_UNORM_PACK16`
- `VK_FORMAT_B5G6R5_UNORM_PACK16`
- `VK_FORMAT_R5G5B5A1_UNORM_PACK16`
- `VK_FORMAT_B5G5R5A1_UNORM_PACK16`
- `VK_FORMAT_R8_USCALED`
- `VK_FORMAT_R8_SSCALED`
- `VK_FORMAT_R8_SRGB`
- `VK_FORMAT_R8G8_USCALED`
- `VK_FORMAT_R8G8_SSCALED`
- `VK_FORMAT_R8G8_SRGB`
- `VK_FORMAT_R8G8B8_UNORM`
- `VK_FORMAT_R8G8B8_SNORM`
- `VK_FORMAT_R8G8B8_USCALED`
- `VK_FORMAT_R8G8B8_SSCALED`
- `VK_FORMAT_R8G8B8_UINT`
- `VK_FORMAT_R8G8B8_SINT`
- `VK_FORMAT_R8G8B8_SRGB`
- `VK_FORMAT_B8G8R8_UNORM`
- `VK_FORMAT_B8G8R8_SNORM`
- `VK_FORMAT_B8G8R8_USCALED`
- `VK_FORMAT_B8G8R8_SSCALED`
- `VK_FORMAT_B8G8R8_UINT`
- `VK_FORMAT_B8G8R8_SINT`
- `VK_FORMAT_B8G8R8_SRGB`
- `VK_FORMAT_R8G8B8A8_USCALED`
- `VK_FORMAT_R8G8B8A8_SSCALED`
- `VK_FORMAT_B8G8R8A8_SNORM`
- `VK_FORMAT_B8G8R8A8_USCALED`
- `VK_FORMAT_B8G8R8A8_SSCALED`
- `VK_FORMAT_B8G8R8A8_UINT`
- `VK_FORMAT_B8G8R8A8_SINT`
- `VK_FORMAT_A8B8G8R8_UNORM_PACK32`
- `VK_FORMAT_A8B8G8R8_SNORM_PACK32`
- `VK_FORMAT_A8B8G8R8_USCALED_PACK32`
- `VK_FORMAT_A8B8G8R8_SSCALED_PACK32`
- `VK_FORMAT_A8B8G8R8_UINT_PACK32`
- `VK_FORMAT_A8B8G8R8_SINT_PACK32`
- `VK_FORMAT_A8B8G8R8_SRGB_PACK32`
- `VK_FORMAT_A2R10G10B10_UNORM_PACK32`
- `VK_FORMAT_A2R10G10B10_SNORM_PACK32`
- `VK_FORMAT_A2R10G10B10_USCALED_PACK32`
- `VK_FORMAT_A2R10G10B10_SSCALED_PACK32`
- `VK_FORMAT_A2R10G10B10_UINT_PACK32`
- `VK_FORMAT_A2R10G10B10_SINT_PACK32`
- `VK_FORMAT_A2B10G10R10_SNORM_PACK32`
- `VK_FORMAT_A2B10G10R10_USCALED_PACK32`
- `VK_FORMAT_A2B10G10R10_SSCALED_PACK32`
- `VK_FORMAT_A2B10G10R10_SINT_PACK32`
- `VK_FORMAT_R16_USCALED`
- `VK_FORMAT_R16_SSCALED`
- `VK_FORMAT_R16G16_USCALED`
- `VK_FORMAT_R16G16_SSCALED`
- `VK_FORMAT_R16G16B16_UNORM`
- `VK_FORMAT_R16G16B16_SNORM`
- `VK_FORMAT_R16G16B16_USCALED`
- `VK_FORMAT_R16G16B16_SSCALED`
- `VK_FORMAT_R16G16B16_UINT`
- `VK_FORMAT_R16G16B16_SINT`
- `VK_FORMAT_R16G16B16_SFLOAT`
- `VK_FORMAT_R16G16B16A16_USCALED`
- `VK_FORMAT_R16G16B16A16_SSCALED`
- `VK_FORMAT_R64_UINT`
- `VK_FORMAT_R64_SINT`
- `VK_FORMAT_R64_SFLOAT`
- `VK_FORMAT_R64G64_UINT`
- `VK_FORMAT_R64G64_SINT`
- `VK_FORMAT_R64G64_SFLOAT`
- `VK_FORMAT_R64G64B64_UINT`
- `VK_FORMAT_R64G64B64_SINT`
- `VK_FORMAT_R64G64B64_SFLOAT`
- `VK_FORMAT_R64G64B64A64_UINT`
- `VK_FORMAT_R64G64B64A64_SINT`
- `VK_FORMAT_R64G64B64A64_SFLOAT`
- `VK_FORMAT_X8_D24_UNORM_PACK32`
- `VK_FORMAT_S8_UINT`
- `VK_FORMAT_D16_UNORM_S8_UINT`
- `VK_FORMAT_D24_UNORM_S8_UINT`
- `VK_FORMAT_D32_SFLOAT_S8_UINT`
- `VK_FORMAT_BC1_RGB_UNORM_BLOCK`
- `VK_FORMAT_BC1_RGB_SRGB_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK`
- `VK_FORMAT_EAC_R11_UNORM_BLOCK`
- `VK_FORMAT_EAC_R11_SNORM_BLOCK`
- `VK_FORMAT_EAC_R11G11_UNORM_BLOCK`
- `VK_FORMAT_EAC_R11G11_SNORM_BLOCK`
- `VK_FORMAT_ASTC_4x4_UNORM_BLOCK`
- `VK_FORMAT_ASTC_4x4_SRGB_BLOCK`
- `VK_FORMAT_ASTC_5x4_UNORM_BLOCK`
- `VK_FORMAT_ASTC_5x4_SRGB_BLOCK`
- `VK_FORMAT_ASTC_5x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_5x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_6x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_6x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_6x6_UNORM_BLOCK`
- `VK_FORMAT_ASTC_6x6_SRGB_BLOCK`
- `VK_FORMAT_ASTC_8x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_8x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_8x6_UNORM_BLOCK`
- `VK_FORMAT_ASTC_8x6_SRGB_BLOCK`
- `VK_FORMAT_ASTC_8x8_UNORM_BLOCK`
- `VK_FORMAT_ASTC_8x8_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x6_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x6_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x8_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x8_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x10_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x10_SRGB_BLOCK`
- `VK_FORMAT_ASTC_12x10_UNORM_BLOCK`
- `VK_FORMAT_ASTC_12x10_SRGB_BLOCK`
- `VK_FORMAT_ASTC_12x12_UNORM_BLOCK`
- `VK_FORMAT_ASTC_12x12_SRGB_BLOCK`
- `VK_FORMAT_G8B8G8R8_422_UNORM`
- `VK_FORMAT_B8G8R8G8_422_UNORM`
- `VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM`
- `VK_FORMAT_G8_B8R8_2PLANE_420_UNORM`
- `VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM`
- `VK_FORMAT_G8_B8R8_2PLANE_422_UNORM`
- `VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM`
- `VK_FORMAT_R10X6_UNORM_PACK16`
- `VK_FORMAT_R10X6G10X6_UNORM_2PACK16`
- `VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16`
- `VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16`
- `VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16`
- `VK_FORMAT_R12X4_UNORM_PACK16`
- `VK_FORMAT_R12X4G12X4_UNORM_2PACK16`
- `VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16`
- `VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16`
- `VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16`
- `VK_FORMAT_G16B16G16R16_422_UNORM`
- `VK_FORMAT_B16G16R16G16_422_UNORM`
- `VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM`
- `VK_FORMAT_G16_B16R16_2PLANE_420_UNORM`
- `VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM`
- `VK_FORMAT_G16_B16R16_2PLANE_422_UNORM`
- `VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM`
- `VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT`
- `VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT`
- `VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT`
- `VK_FORMAT_G8B8G8R8_422_UNORM_KHR`
- `VK_FORMAT_B8G8R8G8_422_UNORM_KHR`
- `VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR`
- `VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR`
- `VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR`
- `VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR`
- `VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR`
- `VK_FORMAT_R10X6_UNORM_PACK16_KHR`
- `VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR`
- `VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR`
- `VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR`
- `VK_FORMAT_R12X4_UNORM_PACK16_KHR`
- `VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR`
- `VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR`
- `VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR`
- `VK_FORMAT_G16B16G16R16_422_UNORM_KHR`
- `VK_FORMAT_B16G16R16G16_422_UNORM_KHR`
- `VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR`
- `VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR`
- `VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR`
- `VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR`
- `VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_K`

View file

@ -1,9 +0,0 @@
### Derivatives In Compute
An entry point may be decorated with `[DerivativeGroupQuad]` or `[DerivativeGroupLinear]` to specify how to use derivatives in compute shaders.
GLSL syntax may also be used, but is not recommended (`derivative_group_quadsNV`/`derivative_group_linearNV`).
Targets:
* **_SPIRV:_** Enables `DerivativeGroupQuadsNV` or `DerivativeGroupLinearNV`.
* **_GLSL:_** Enables `derivative_group_quadsNV` or `derivative_group_LinearNV`.
* **_HLSL:_** Does nothing. `sm_6_6` is required to use derivatives in compute shaders. HLSL uses an equivalent of `DerivativeGroupQuad`.

View file

@ -1,205 +0,0 @@
Texture Footprint Queries
=========================
Slang supports querying the *footprint* of a texture sampling operation: the texels that would be accessed when performing that operation.
This feature is supported on Vulkan via the `GL_NV_shader_texture_footprint` extension, and on D3D12 via the `NvFootprint*` functions exposed by NVAPI.
# Background
There are many GPU rendering techniques that involve generating a texture (e.g., by rendering to it) and then sampling from that texture in a 3D rendering pass, such that it is difficult to predict *a priori* which parts of the texture will be accessed, or not.
As one example, consider rendering a shadow map that will be accessed when shading a g-buffer.
Depending on the geometry that was rendered into the g-buffer, and the occlusion that might exist, some parts of the shadow map might not be needed at all.
In principle, an application could use a compute pass on the g-buffer to compute, for each pixel, the part of the shadow-map texture that it will access - its footprint.
The application could then aggregate these footprints into a stencil mask or other data structure that could be used to optimize the rendering pass that generates the shadow map.
Unfortunately, it is almost impossible for applications to accurately and reliably predict the texel data that particular sampling operations will require, once non-trivial texture filtering modes are considered.
Sampling operations support a wide variety of state that affects the lookup and filtering of texels. For example:
* When bilinear filtering is enabled, a sampling operation typically accesses the four texels closest to the sampling location and blends them.
* When trilinear filtering is enabled, a sampling operation may access texels at two different mip levels.
* When anisotropic filtering is enabled, a sampling operation may take up to N *taps* (where N is the maximum supported degree of anisotropy), each of which may itself access a neighborhood of texels to produce a filtered value for that tap.
* When sampling a cube map, a sampling operation may straddle the "seam" between two or even three cube faces.
Texture footprint queries are intended to solve this problem by providing application developers with a primitive that can query the footprint of a texture sampling operation using the exact same sampler state and texture coordinates that will be used when sampling the texture later.
# Slang Shader API
Rather than exactly mirror the Vulkan GLSL extension or the NVAPI functions, the Slang core module provides a single common interface that can map to either of those implementations.
## Basics
A typical 2D texture sampling operation is performed using the `Sample()` method on `Texture2D`:
```hlsl
Texture2D<float4> texture = ...;
SamplerState sampler = ...;
float2 coords = ...;
// Sample a 2D texture
float4 color = texture.Sample(
sampler, coords);
```
To query the footprint that would be accessed by this operation, we can use an operation like:
```hlsl
uint granularity = ...;
TextureFootprint2D footprint = texture.queryFootprintCoarse(granularity,
sampler, coords);
```
Note that the same arguments used to call `Sample` above are here passed to `queryFootprint` in the exact same order.
The returned `footprint` encodes a conservative footprint of the texels that would be accessed by the equivalent `Sample` operation above.
Texture footprints are encoded in terms of blocks of texels, and the size of those blocks determined the *granularity* of the footprint.
The `granularity` argument to `queryFootprintCoarse` above indicates the granularity of blocks that the application requests.
In cases where a filtering operation might access two mip levels - one coarse and one fine - a footprint query only returns information about one of the two levels.
The application selects between these options by calling either `queryFootprintCoarse` or `queryFootprintFine`.
## Variations
A wide range of footprint queries are provided, corresponding to various cases of texture sampling operations with different parameters.
For 2D textures, the following functions are supported:
```hlsl
TextureFootprint2D Texture2D.queryFootprintCoarse(
uint granularity, SamplerState sampler, float2 coords);
TextureFootprint2D Texture2D.queryFootprintFine(
uint granularity, SamplerState sampler, float2 coords);
TextureFootprint2D Texture2D.queryFootprintCoarseBias(
uint granularity, SamplerState sampler, float2 coords,
float lodBias);
TextureFootprint2D Texture2D.queryFootprintFineBias(
uint granularity, SamplerState sampler, float2 coords,
float lodBias);
TextureFootprint2D Texture2D.queryFootprintCoarseLevel(
uint granularity, SamplerState sampler, float2 coords,
float lod);
TextureFootprint2D Texture2D.queryFootprintFineLevel(
uint granularity, SamplerState sampler, float2 coords,
float lod);
TextureFootprint2D Texture2D.queryFootprintCoarseGrad(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy);
TextureFootprint2D Texture2D.queryFootprintFineGrad(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy);
// Vulkan-only:
TextureFootprint2D Texture2D.queryFootprintCoarseClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintFineClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintCoarseBiasClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodBias,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintFineBiasClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodBias,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintCoarseGradClamp(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintFineGradClamp(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy,
float lodClamp);
```
For 3D textures, the following functions are supported:
```hlsl
TextureFootprint3D Texture3D.queryFootprintCoarse(
uint granularity, SamplerState sampler, float3 coords);
TextureFootprint3D Texture3D.queryFootprintFine(
uint granularity, SamplerState sampler, float3 coords);
TextureFootprint3D Texture3D.queryFootprintCoarseBias(
uint granularity, SamplerState sampler, float3 coords,
float lodBias);
TextureFootprint3D Texture3D.queryFootprintFineBias(
uint granularity, SamplerState sampler, float3 coords,
float lodBias);
TextureFootprint3D Texture3D.queryFootprintCoarseLevel(
uint granularity, SamplerState sampler, float3 coords,
float lod);
TextureFootprint3D Texture3D.queryFootprintFineLevel(
uint granularity, SamplerState sampler, float3 coords,
float lod);
// Vulkan-only:
TextureFootprint3D Texture3D.queryFootprintCoarseClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodClamp);
TextureFootprint3D Texture3D.queryFootprintFineClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodClamp);
TextureFootprint3D Texture3D.queryFootprintCoarseBiasClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodBias,
float lodClamp);
TextureFootprint3D Texture3D.queryFootprintFineBiasClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodBias,
float lodClamp);
```
## Footprint Types
Footprint queries on 2D and 3D textures return values of type `TextureFootprint2D` and `TextureFootprint3D`, respectively, which are built-in `struct`s defined in the Slang core module:
```
struct TextureFootprint2D
{
typealias Anchor = uint2;
typealias Offset = uint2;
typealias Mask = uint2;
typealias LOD = uint;
typealias Granularity = uint;
property anchor : Anchor { get; }
property offset : Offset { get; }
property mask : Mask { get; }
property lod : LOD { get; }
property granularity : Granularity { get; }
property isSingleLevel : bool { get; }
}
struct TextureFootprint3D
{
typealias Anchor = uint3;
typealias Offset = uint3;
typealias Mask = uint2;
typealias LOD = uint;
typealias Granularity = uint;
property anchor : Anchor { get; }
property offset : Offset { get; }
property mask : Mask { get; }
property lod : LOD { get; }
property granularity : Granularity { get; }
property isSingleLevel : bool { get; }
}
```
A footprint is encoded in terms of *texel groups*, where the `granularity` determines the size of those groups.
When possible, the returned footprint will match the granularity passed into the query operation, but a larger granularity may be selected in cases where the footprint is too large to encode at the requested granularity.
The `anchor` property specifies an anchor point in the texture, in the vicinity of the footprint. Its components are in multiples of 8 texel groups.
The `offset` property specifies how the bits in `mask` map to texel groups in the vicinity of the `anchor` point.
The `mask` property is a 64-bit bitfield (encoded as a `uint2`), where each bit represents footprint coverage of one texel group, within a 8x8 (for 2D textures) or 4x4x4 neighborhood of texel groups.
The `lod` property indicates the mipmap level that would be accessed by the sampling operation.
The `isSingleLevel` property indicates if the sampling operation is known to access only a single mip level.
Note that this property will always be `false` when using the D3D/NVAPI path.

View file

@ -1,259 +0,0 @@
Slang Language Guide
====================
This document will try to describe the main characteristis of the Slang language that might make it different from other shading languages you have used.
The Basics
----------
Slang is similar to HLSL, and it is expected that many HLSL programs can be used as Slang code with no modifications.
Big-picture stuff that is supported:
* A C-style preprocessor
* Ordinary function, `struct`, `typedef`, etc. declarations
* The standard vector/matrix types like `float3` and `float4x4`
* The less-used explicit `vector<T,N>` and `matrix<T,R,C>` types
* `cbuffer` declarations for uniform parameters
* Global-scope declarations of texture/sampler parameters, including with `register` annotations
* Entry points with varying `in`/`out` parameters using semantics (including `SV_*` system-value semantics)
* The built-in templated resource types like `Texture2D<T>` with their object-oriented syntax for sampling operations
* Attributes like `[unroll]` are parsed, and passed along for HLSL/DXBC output, but dropped for other targets
* `struct` types that contain textures/samplers as well as ordinary uniform data, both as function parameters and in constant buffers
* The built-in functions up through Shader Model 6.0 (as documented on MSDN) are supported
New Features
------------
### Import Declarations
In order to support better software modularity, and also to deal with the issue of how to integrate shader libraries written in Slang into other languages, Slang introduces an `import` declaration construct.
The basic idea is that if you write a file `foo.slang` like this:
```hlsl
// foo.slang
float4 someFunc(float4 x) { return x; }
```
you can then import this code into another file in Slang, HLSL, or GLSL:
```hlsl
// bar.slang
import foo;
float4 someOtherFunc(float4 y) { return someFunc(y); }
```
The simplest way to think of it is that the `import foo` declaration instructs the compiler to look for `foo.slang` (in the same search paths it uses for `#include` files), and give an error if it isn't found.
If `foo.slang` is found, then the compiler will go ahead and parse and type-check that file, and make any declarations there visible to the original file (`bar.glsl` in this example).
When it comes time to generate output code, Slang will output any declarations from `import`ed files that were actually used (it skips those that are never referenced), and it will cross-compile them as needed for the chosen target.
A few other details worth knowing about `import` declarations:
* The name you use on the `import` line gets translated into a file name with some very simple rules. An underscore (`_`) in the name turns into a dash (`-`) in the file name, and dot separators (`.`) turn into directory separators (`/`). After these substitutions, `.slang` is added to the end of the name.
* If there are multiple `import` declarations naming the same file, it will only be imported once. This is also true for nested imports.
* Currently importing does not imply any kind of namespacing; all global declarations still occupy a single namespace, and collisions between different imported files (or between a file and the code it imports) are possible. This is a bug.
* If file `A.slang` imports `B.slang`, and then some other file does `import A;`, then only the names from `A.slang` are brought into scope, not those from `B.slang`. This behavior can be controlled by having `A.slang` use `__exported import B;` to also re-export the declarations it imports from `B`.
* An import is *not* like a `#include`, and so the file that does the `import` can't see preprocessor macros defined in the imported file (and vice versa). Think of `import foo;` as closer to `using namespace foo;` in C++ (perhaps without the same baggage).
### Explicit Parameter Blocks
One of the most important new features of modern APIs like Direct3D 12 and Vulkan is an interface for providing shader parameters using efficient *parameter blocks* that can be stored in GPU memory (these are implemented as descriptor tables/sets in D3D12/Vulkan, and "attribute buffers" in Metal).
However, HLSL and GLSL don't support explicit syntax for parameter blocks, and so shader programmers are left to manually pack parameters into blocks either using `register`/`layout` modifiers, or with API-based remapping (in the D3D12 case).
Slang supports a simple and explicit syntax for exploiting parameter blocks:
```hlsl
struct ViewParams
{
float3 cameraPos;
float4x4 viewProj;
TextureCube envMap;
};
ParameterBlock<ViewParams> gViewParams;
```
In this example, the fields of `gViewParams` will be assigned to registers/bindings in a way that supports allocating them into a single parameter block.
For example, when generating GLSL for Vulkan, the Slang compiler will generate a single `uniform` block (for `cameraPos` and `viewProj`) and a global `textureCube` for `envMap`, both decorated with the same `layout(set = ...)`.
### Interfaces
Slang supports declaring `interface`s that user-defined `struct` types can implement.
For example, here is a simple interface for light sources:
```hlsl
// light.slang
struct LightSample { float3 intensity; float3 direction; };
interface ILight
{
LightSample sample(float3 position);
}
```
We can now define a simple user type that "conforms to" (implements) the `ILight` interface:
```hlsl
// point-light.slang
import light;
struct PointLight : ILight
{
float3 position;
float3 intensity;
LightSample sample(float3 hitPos)
{
float3 delta = hitPos - position;
float distance = length(delta);
LightSample sample;
sample.direction = delta / distance;
sample.intensity = intensity * falloff(distance);
return sample;
}
}
```
### Generics
Slang supports *generic* declarations, using the commong angle-brack (`<>`) syntax from languages like C#, Java, etc.
For example, here is a generic function that works with any type of light:
```hlsl
// diffuse.slang
import light;
float4 computeDiffuse<L : ILight>( float4 albedo, float3 P, float3 N, L light )
{
LightSample sample = light.sample(P);
float nDotL = max(0, dot(N, sample.direction));
return albedo * nDotL;
}
```
The `computeDiffuse` function works with any type `L` that implements the `ILight` interface.
Unlike with C++ templates, the `computeDiffuse` function can be compiled and type-checked once (you won't suddenly get unexpected error messages when plugging in a new type).
#### Global-Scope Generic Parameters
Putting generic parameter directly on functions is helpful, but in many cases existing HLSL shaders declare their parameters at global scope.
For example, we might have a shader that uses a global declaration of material parameters:
```hlsl
Material gMaterial;
```
In order to allow such a shader to be converted to use a generic parameter for the material type (to allow for specialization), Slang supports declaring type parameters at the global scope:
```hlsl
type_param M : IMaterial;
M gMaterial;
```
Conceptually, you can think of this syntax as wrapping your entire shader program in a generic with parameter `<M : IMaterial>`.
This isn't beautiful syntax, but it may help when incrementally porting an existing HLSL codebase to use Slang's features.
### Associated Types
Sometimes it is difficult to define an interface because each type that implements it might need to make its own choice about some intermediate type.
As a concrete example, suppose we want to define an interface `IMaterial` for material surface shaders, where each material might use its own BRDF.
We want to support evaluating the *pattern* of the surface separate from the reflectance function.
```hlsl
// A reflectance function
interface IBRDF
{
float3 eval(float3 wi, float3 wo);
}
struct DisneyBRDF : IBRDF { ... };
struct KajiyaKay : IBRDF { ... };
// a surface pattern
interface IMaterial
{
??? evalPattern(float3 position, float2 uv);
}
```
What is the type `???` that `evalPattern` should return? We know that it needs to be a type that supports `IBRDF`, but *which* type?
One material might want to use `DisneyBRDF` while another wants to use `KajiyaKay`.
The solution in Slang, as in modern languages like Swift and Rust, is to use *associated types* to express the dependence of the BRDF type on the material type:
```hlsl
interface IMaterial
{
associatedtype B : IBRDF;
B evalPattern(float3 position, float2 uv);
}
struct MyCoolMaterial : IMaterial
{
typedef DisneyBRDF B;
B evalPattern(float3 position, float2 uv)
{ ... }
}
```
Associated types are an advanced concept, and we only recommend using them when they are needed to define a usable interface.
Future Extensions
-----------------
### Implicit Generics Syntax
The syntax for generics and interfaces in Slang is currently explicit, but verbose:
```hlsl
float4 computeDiffuse<L : ILight>( L light, ... )
{ ... }
```
As a future change, we would like to allow using an interface like `ILight` as an ordinary parameter type:
```hlsl
float4 computeDiffuse( ILight light, ... )
{ ... }
```
This simpler syntax would act like "syntactic sugar" for the existing explicit generics syntax, so it would retain all of the important performance properties.
### Returning a Value of Interface Type
While the above dealt with using an interface as a parameter type, we would eventually like to support using an interface as the *return* type of a function:
```hlsl
ILight getALightSource(Scene scene) { ... }
```
Implementing this case efficiently is more challenging. In most cases, an associated type can be used instead when an interface return type would be desired.
Not Supported
-------------
Some features of the current HLSL language are not supported, but probably will be given enough time/resources:
* Local variables of texture/sampler type (or that contain these)
* Matrix swizzles
* Explicit `packoffset` annotations on members of `cbuffer`s
Some things from HLSL are *not* planned to be supported, unless there is significant outcry from users:
* Pre-D3D10/11 syntax and operations
* The "effect" system, and the related `<>` annotation syntax
* Explicit `register` bindings on textures/samplers nested in `cbuffer`s
* Any further work towards making HLSL a subset of C++ (simply because implementing a full C++ compiler is way out of scope for the Slang project)

View file

@ -1,35 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Introduction
============
Slang is a programming language primarily designed for use in *shader programming*, by which we mean performance oriented GPU programming for real-time graphics.
Overview
--------
This document aims to provide a detailed reference for the Slang language and its supported constructs.
The Slang compiler *implementation* may deviate from the language as documented here, in a few key ways:
* The implementation is necessarily imperfect, and can have bugs
* The implementation may not fully support constructs documented here, or their capabilities may not be as complete as what is documented
* The implementation may support certain constructs that are experimental, deprecated, or are otherwise intentionally undocumented
Where possible, this document will call out known deviations between the language as defined here and the implementation in the compiler.
Terminology
-----------
> Note: This section is not yet complete.
>
> This section should detail how the document uses terms like "may" and "must," if we intend for those to be used in a manner consistent with [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt).
Typographical Conventions
-------------------------
> Note: This section is not yet complete.
>
> This section should clarify how the document displays code fragments, grammar productions, etc.

View file

@ -1,121 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Lexical Structure
=================
Source Units
------------
A _source unit_ comprises a sequence of zero or more _characters_ which for purposes of this document are defined as Unicode scalars (code points).
Encoding
--------
Implementations *may* accept source units stored as files on disk, buffers in memory, or any appropriate implementation-specified means.
When source units are stored as byte sequences, they *should* be encoded using UTF-8.
Implementations *may* support additional implemented-specified encodings.
Whitespace
----------
_Horizontal whitespace_ consists of space (U+0020) and horizontal tab (U+0009).
A _line break_ consists of a line feed (U+000A), carriage return (U+000D) or a carriage return followed by a line feed (U+000D, U+000A).
Line breaks are used as line separators rather than terminators; it is not necessary for a source unit to end with a line break.
Escaped Line Breaks
-------------------
An _escaped line break_ comprises a backslack (`\`, U+005C) follow immediately by a line break.
Comments
--------
A _comment_ is either a line comment or a block comment:
```hlsl
// a line comment
/* a block comment */
```
A _line comment_ comprises two forward slashes (`/`, U+002F) followed by zero or more characters that do not contain a line break.
A line comment extends up to, but does not include, a subsequent line break or the end of the source unit.
A _block comment_ begins with a forward slash (`/`, U+002F) followed by an asterisk (`*`, U+0052).
A block comment is terminated by the next instance of an asterisk followed by a forward slash (`*/`).
A block comment contains all characters between where it begins and where it terminates, including any line breaks.
Block comments do not nest.
It is an error if a block comment that begins in a source unit is not terminated in that source unit.
Phases
------
Compilation of a source unit proceeds _as if_ the following steps are executed in order:
1. Line numbering (for subsequent diagnostic messages) is noted based on the locations of line breaks
2. Escaped line breaks are eliminated. No new characters are inserted to replace them. Any new escaped line breaks introduced by this step are not eliminated.
3. Each comments is replaced with a single space (U+0020)
4. The source unit is _lexed_ into a sequence of tokens according the lexical grammar in this chapter
5. The lexed sequence of tokens is _preprocessed_ to produce a new sequence of tokens (Chapter 3)
6. Subsequent processing is performed on the preprocessed sequence of tokens
Identifiers
-----------
An _identifier_ begins with an uppercase or lowercase ASCII letter (`A` through `Z`, `a` through `z`), or an underscore (`_`).
After the first character, ASCII digits (`0` through `9`) may also be used in an identifier.
The identifier consistent of a single underscore (`_`) is reserved by the language and must not be used by programs.
Otherwise, there are no fixed keywords or reserved words.
Words that name a built-in language construct can also be used as user-defined identifiers and will shadow the built-in definitions in the scope of their definition.
Literals
--------
### Integer Literals
An _integer literal_ consists of an optional radix specifier followed by digits and an optional suffix.
The _radix specifier_ may be:
* `0x` or `0X` to specify a hexadecimal literal (radix 16)
* `0b` or `0B` to specify a binary literal (radix 2)
When no radix specifier is present a radix of 10 is used.
Octal literals (radix 8) are not supported.
A `0` prefix on an integer literal does *not* specify an octal literal as it does in C.
Implementations *may* warn on integer literals with a `0` prefix in case users expect C behavior.
The _digits_ of an integer literal may include ASCII `0` through `9`.
In the case of a hexadecimal literal, digits may include the letters `A` through `F` (and `a` through `f`) which represent digit values of 10 through 15.
It is an error for an integer literal to include a digit with a value greater than or equal to the radix.
The digits of an integer literal may also include underscore (`_`) characters, which are ignored and have no semantic impact.
The _suffix_ on an integer literal may be used to indicate the desired type of the literal:
* A `u` suffix indicates the `uint` type
* An `l` or `ll` suffix indicates the `int64_t` type
* A `ul` or `ull` suffix indicates the `uint64_t` type
### Floating-Point Literals
> Note: This section is not yet complete.
### String Literals
> Note: This section is not yet complete.
### Character Literals
> Note: This section is not yet complete.
Operators and Punctuation
-------------------------
> Note: This section is not yet complete.

View file

@ -1,19 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Preprocessor
============
Slang supports a C-style preprocessor with the following directives:
* `#include`
* `#define`
* `#undef`
* `#if`, `#ifdef`, `#ifndef`
* `#else`, `#elif`
* `#endif`
* `#error`
* `#warning`
* `#line`
* `#pragma`
> Note: This section is not yet complete.

View file

@ -1,339 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Types
=====
This section defines the kinds of types supported by Slang.
Types in Slang do not necessarily prescribe a single _layout_ in memory.
The discussion of each type will specify any guarantees about layout it provides; any details of layout not specified here may depend on the target platform, compiler options, and context in which a type is used.
Void Type
---------
The type `void` contains no data and has a single, unnamed, value.
A `void` value takes up no space, and thus does not affect the layout of types.
Formally, a `void` value behaves as if it has a size of zero bytes, and one-byte alignment.
Scalar Types
------------
### Boolean Type
The type `bool` is used to represent Boolean truth values: `true` and `false`.
The size of a `bool` varies across target platforms; programs that need to ensure a matching in-memory layout between targets should not use `bool` for in-memory data structures.
On all platforms, the `bool` type must be _naturally aligned_ (its alignment is its size).
### Integer Types
The following integer types are defined:
| Name | Description |
|---------------|-------------|
| `int8_t` | 8-bit signed integer |
| `int16_t` | 16-bit signed integer |
| `int` | 32-bit signed integer |
| `int64_t` | 64-bit signed integer |
| `uint8_t` | 8-bit unsigned integer |
| `uint16_t` | 16-bit unsigned integer |
| `uint` | 32-bit unsigned integer |
| `uint64_t` | 64-bit unsigned integer |
All signed integers used two's complement representation.
All arithmetic operations on integers (both signed and unsigned) wrap on overflow/underflow.
All target platforms must support the `int` and `uint` types.
Specific [target platforms](../target-compatibility.md) may not support the other integer types.
All integer types are stored in memory with their natural size and alignment on all targets that support them.
### Floating-Point Types
The following floating-point type are defined:
| Name | Description |
|---------------|-------------------------------|
| `half` | 16-bit floating-point number (1 sign bit, 5 exponent bits, 10 fraction bits) |
| `float` | 32-bit floating-point number (1 sign bit, 8 exponent bits, 23 fraction bits) |
| `double` | 64-bit floating-point number (1 sign bit, 11 exponent bits, 52 fraction bits) |
All floating-point types are laid out in memory using the matching IEEE 754 standard format (`binary16`, `binary32`, `binary64`).
Target platforms may define their own rules for rounding, precision, denormals, infinities, and not-a-number values.
All target platforms must support the `float` type.
Specific [targets](../target-compatibility.md) may not support the other floating-point types.
All floating-point types are stored in memory with their natural size and alignment on all targets that support them.
Vector Types
------------
A vector type is written as `vector<T, N>` and represents an `N`-element vector with elements of type `T`.
The _element type_ `T` must be one of the built-in scalar types, and the _element count_ `N` must be a specialization-time constant integer.
The element count must be between 2 and 4, inclusive.
A vector type allows subscripting of its elements like an array, but also supports element-wise arithmetic on its elements.
_Element-wise arithmetic_ means mapping unary and binary operators over the elements of a vector to produce a vector of results:
```hlsl
vector<int,4> a = { 1, 2, 30, 40 };
vector<int,4> b = { 10, 20, 3, 4 };
-a; // yields { -1, -2, -30, -40 }
a + b; // yields { 11, 22, 33, 44 }
b / a; // yields { 10, 10, 0, 0 }
a > b; // yields { false, false, true, true }
```
A vector type is laid out in memory as `N` contiguous values of type `T` with no padding.
The alignment of a vector type may vary by target platforms.
The alignment of `vector<T,N>` will be at least the alignment of `T` and may be at most `N` times the alignment of `T`.
As a convenience, Slang defines built-in type aliases for vectors of the built-in scalar types.
E.g., declarations equivalent to the following are provided by the Slang core module:
```hlsl
typealias float4 = vector<float, 4>;
typealias int8_t3 = vector<int8_t, 3>;
```
### Legacy Syntax
For compatibility with older codebases, the generic `vector` type includes default values for `T` and `N`, being declared as:
```hlsl
struct vector<T = float, let N : int = 4> { ... }
```
This means that the bare name `vector` may be used as a type equivalent to `float4`:
```hlsl
// All of these variables have the same type
vector a;
float4 b;
vector<float> c;
vector<float, 4> d;
```
Matrix Types
------------
A matrix type is written as `matrix<T, R, C>` and represents a matrix of `R` rows and `C` columns, with elements of type `T`.
The element type `T` must be one of the built-in scalar types.
The _row count_ `R` and _column count_ `C` must be specialization-time constant integers.
The row count and column count must each be between 2 and 4, respectively.
A matrix type allows subscripting of its rows, similar to an `R`-element array of `vector<T,C>` elements.
A matrix type also supports element-wise arithmetic.
Matrix types support both _row-major_ and _column-major_ memory layout.
Implementations may support command-line flags or API options to control the default layout to use for matrices.
> Note: Slang currently does *not* support the HLSL `row_major` and `column_major` modifiers to set the layout used for specific declarations.
Under row-major layout, a matrix is laid out in memory equivalently to an `R`-element array of `vector<T,C>` elements.
Under column-major layout, a matrix is laid out in memory equivalent to the row-major layout of its transpose.
This means it will be laid out equivalently to a `C`-element array of `vector<T,R>` elements.
As a convenience, Slang defines built-in type aliases for matrices of the built-in scalar types.
E.g., declarations equivalent to the following are provided by the Slang core module:
```hlsl
typealias float3x4 = matrix<float, 3, 4>;
typealias int64_t4x2 = matrix<int64_t, 4, 2>;
```
> Note: For programmers using OpenGL or Vulkan as their graphics API, and/or who are used to the GLSL language,
> it is important to recognize that the equivalent of a GLSL `mat3x4` is a Slang `float3x4`.
> This is despite the fact that GLSL defines a `mat3x4` as having 3 *columns* and 4 *rows*, while a Slang `float3x4` is defined as having 3 rows and 4 columns.
> This convention means that wherever Slang refers to "rows" or "columns" of a matrix, the equivalent terms in the GLSL, SPIR-V, OpenGL, and Vulkan specifications are "column" and "row" respectively (*including* in the compound terms of "row-major" and "column-major")
> While it may seem that this choice of convention is confusing, it is necessary to ensure that subscripting with `[]` can be efficiently implemented on all target platforms.
> This decision in the Slang language is consistent with the compilation of HLSL to SPIR-V performed by other compilers.
### Legacy Syntax
For compatibility with older codebases, the generic `matrix` type includes default values for `T`, `R`, and `C`, being declared as:
```hlsl
struct matrix<T = float, let R : int = 4, let C : int = 4> { ... }
```
This means that the bare name `matrix` may be used as a type equivalent to `float4x4`:
```hlsl
// All of these variables have the same type
matrix a;
float4x4 b;
matrix<float, 4, 4> c;
```
Structure Types
---------------
Structure types are introduced with `struct` declarations, and consist of an ordered sequence of named and typed fields:
```hlsl
struct S
{
float2 f;
int3 i;
}
```
### Standard Layout
The _standard layout_ for a structure type uses the following algorithm:
* Initialize variables `size` and `alignment` to zero and one, respectively
* For each field `f` of the structure type:
* Update `alignment` to be the maximum of `alignment` and the alignment of `f`
* Set `size` to the smallest multiple of `alignment` not less than `size`
* Set the offset of field `f` to `size`
* Add the size of `f` to `size`
When this algorithm completes, `size` and `alignment` will be the size and alignment of the structure type.
Most target platforms do not use the standard layout directly, but it provides a baseline for defining other layout algorithms.
Any layout for structure types must guarantee an alignment at least as large as the standard layout.
### C-Style Layout
C-style layout for structure types differs from standard layout by adding an additional final step:
* Set `size` the smallest multiple of `alignment` not less than `size`
This mirrors the layout rules used by typical C/C++ compilers.
### D3D Constant Buffer Layout
D3D constant buffer layout is similar to standard layout with two differences:
* The initial alignment is 16 instead of one
* If a field would have _improper straddle_, where the interval `(fieldOffset, fieldOffset+fieldSize)` (exclusive on both sides) contains any multiple of 16, *and* the field offset is not already a multiple of 16, then the offset of the field is adjusted to the next multiple of 16
Array Types
-----------
An _array type_ is either a statically-sized or dynamically-sized array type.
A known-size array type is written `T[N]` where `T` is a type and `N` is a specialization-time constant integer.
This type represents an array of exactly `N` values of type `T`.
An unknown-size array type is written `T[]` where `T` is a type.
This type represents an array of some fixed, but statically unknown, size.
> Note: Unlike in C and C++, arrays in Slang are always value types, meaning that assignment and parameter passing of arrays copies their elements.
### Declaration Syntax
For variable and parameter declarations using traditional syntax, a variable of array type may be declared by using the element type `T` as a type specifier (before the variable name) and the `[N]` to specify the element count after the variable name:
```hlsl
int a[10];
```
Alternatively, the array type itself may be used as the type specifier:
```hlsl
int[10] a;
```
When using the `var` or `let` keyword to declare a variable, the array type must not be split:
```hlsl
var a : int[10];
```
> Note: when declaring arrays of arrays (often thought of as "multidimensional arrays") a programmer must be careful about the difference between the two declaration syntaxes.
> The following two declarations are equivalent:
>
> ```hlsl
> int[3][5] a;
> int a[5][3];
> ```
>
> In each case, `a` is a five-element array of three-element arrays of `int`s.
> However, one declaration orders the element counts as `[3][5]` and the other as `[5][3]`.
### Element Count Inference
When a variable is declared with an unknown-size array type, and also includes an initial-value expression:
```hlsl
int a[] = { 0xA, 0xB, 0xC, 0xD };
```
The compiler will attempt to infer an element count based on the type and/or structure of the initial-value expression.
In the above case, the compiler will infer an element count of 4 from the structure of the initializer-list expression.
Thus the preceding declaration is equivalent to:
```hlsl
int a[4] = { 0xA, 0xB, 0xC, 0xD };
```
A variable declared in this fashion semantically has a known-size array type and not an unknown-size array type; the use of an unknown-size array type for the declaration is just a convenience feature.
### Standard Layout
The _stride_ of a type is the smallest multiple of its alignment not less than its size.
Using the standard layout for an array type `T[]` or `T[N]`:
* The _element stride_ of the array type is the stride of its element type `T`
* Element `i` of the array starts at an offset that is `i` times the element stride of the array
* The alignment of the array type is the alignment of `T`
* The size of an unknown-size array type is unknown
* The size of a known-size array with zero elements is zero
* The size of a known-size array with a nonzero number `N` of elements is the size of `T` plus `N - 1` times the element stride of the array
### C-Style Layout
The C-style layout of an array type differs from the standard layout in that the size of a known-size array with a nonzero number `N` of elements is `N` times the element stride of the array.
### D3D Constant Buffer Layout
The D3D constant buffer layout of an array differs from the standard layout in that the element stride of the array is set to the smallest multiple of the alignment of `T` that is not less than the stride of `T`
This Type
---------
Within the body of a structure or interface declaration, the keyword `This` may be used to refer to the enclosing type.
Inside of a structure type declaration, `This` refers to the structure type itself.
Inside of an interface declaration, `This` refers to the concrete type that is conforming to the interface (that is, the type of `this`).
Opaque Types
------------
_Opaque_ types are built-in types that (depending on the target platform) may not have a well-defined size or representation in memory.
Similar languages may refer to these as "resource types" or "object types."
The full list of opaque types supported by Slang can be found in the core module reference, but important examples are:
* Texture types such as `Texture2D<T>`, `TextureCubeArray<T>`, and `RWTexture2DMS<T>`
* Sampler state types: `SamplerState` and `SamplerComparisonState`
* Buffer types like `ConstantBuffer<T>` and `StructuredBuffer<T>`
* Parameter blocks: `ParameterBlock<T>`
Layout for opaque types depends on the target platform, and no specific guarantees can be made about layout rules across platforms.
Known and Unknown Size
----------------------
Every type has either known or unknown size.
Types with unknown size arise in a few ways:
* An unknown-size array type has unknown size
* A structure type has unknown size if any field type has unknown size
The use of types with unknown size is restricted as follows:
* A type with unknown size cannot be used as the element type of an array
* A type with unknown size can only be used as the last field of a structure type
* A type with unknown size cannot be used as a generic argument to specialize a user-defined type, function, etc. Specific built-in generic types/functions may support unknown-size types, and this will be documented on the specific type/function.

View file

@ -1,353 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Expressions
===========
Expressions are terms that can be _evaluated_ to produce values.
This section provides a list of the kinds of expressions that may be used in a Slang program.
In general, the order of evaluation of a Slang expression proceeds from left to right.
Where specific expressions do not follow this order of evaluation, it will be noted.
Some expressions can yield _l-values_, which allows them to be used on the left-hand-side of assignment, or as arguments for `out` or `in out` parameters.
Literal Expressions
-------------------
Literal expressions are never l-values.
### Integer Literal Expressions
An integer literal expression consists of a single integer literal token:
```hlsl
123
```
An unsuffixed integer literal expression always has type `int`.
### Floating-Point Literal Expressions
A floating-point literal expression consists of a single floating-point literal token:
```hlsl
1.23
```
A unsuffixed floating-point literal expression always has type `float`.
### Boolean Literal Expressions
Boolean literal expressions use the keywords `true` and `false`.
### String Literal Expressions
A string literal expressions consists of one or more string literal tokens in a row:
```hlsl
"This" "is one" "string"
```
Identifier Expression
---------------------
An _identifier expression_ consists of a single identifier:
```hlsl
someName
```
When evaluated, this expression looks up `someName` in the environment of the expression and yields the value of a declaration with a matching name.
An identifier expression is an l-value if the declaration it refers to is mutable.
### Overloading
It is possible for an identifier expression to be _overloaded_, such that it refers to one or more candidate declarations with the same name.
If the expression appears in a context where the correct declaration to use can be disambiguated, then that declaration is used as the result of the name expression; otherwise use of an overloaded name is an error at the use site.
### Implicit Lookup
It is possible for a name expression to refer to nested declarations in two ways:
* In the body of a method, a reference to `someName` may resolve to `this.someName`, using the implicit `this` parameter of the method
* When a global-scope `cbuffer` or `tbuffer` declaration is used, `someName` may refer to a field declared inside the `cbuffer` or `tbuffer`
Member Expression
-----------------
A _member expression_ consists of a base expression followed by a dot (`.`) and an identifier naming a member to be accessed:
```hlsl
base.m
```
When `base` is a structure type, this expression looks up the field or other member named by `m`.
Just as for an identifier expression, the result of a member expression may be overloaded, and might be disambiguated based on how it is used.
A member expression is an l-value if the base expression is an l-value and the member it refers to is mutable.
### Implicit Dereference
If the base expression of a member reference is a _pointer-like type_ such as `ConstantBuffer<T>`, then a member reference expression will implicitly dereference the base expression to refer to the pointed-to value (e.g., in the case of `ConstantBuffer<T>` this is the buffer contents of type `T`).
### Vector Swizzles
When the base expression of a member expression is of a vector type `vector<T,N>` then a member expression is a _vector swizzle expression_.
The member name must conform to these constraints:
* The member name must comprise between one and four ASCII characters
* The characters must be come either from the set (`x`, `y`, `z`, `w`) or (`r`, `g`, `b`, `a`), corresponding to element indics of (0, 1, 2, 3)
* The element index corresponding to each character must be less than `N`
If the member name of a swizzle consists of a single character, then the expression has type `T` and is equivalent to a subscript expression with the corresponding element index.
If the member name of a swizzle consists of `M` characters, then the result is a `vector<T,M>` built from the elements of the base vector with the corresponding indices.
A vector swizzle expression is an l-value if the base expression was an l-value and the list of indices corresponding to the characters of the member name contains no duplicates.
### Matrix Swizzles
> Note: The Slang implementation currently doesn't support matrix swizzles.
### Static Member Expressions
When the base expression of a member expression is a type instead of a value, the result is a _static member expression_.
A static member expression can refer to a static field or static method of a structure type.
A static member expression can also refer to a case of an enumeration type.
A static member expression (but not a member expression in general) may use the token `::` instead of `.` to separate the base and member name:
```hlsl
// These are equivalent
Color.Red
Color::Red
```
This Expression
---------------
A _this expression_ consists of the keyword `this` and refers to the implicit instance of the enclosing type that is being operated on in instance methods, subscripts, and initializers.
The type of `this` is `This`.
Parenthesized Expression
----------------------
An expression wrapped in parentheses `()` is a _parenthesized expression_ and evaluates to the same value as the wrapped expression.
Call Expression
---------------
A _call expression_ consists of a base expression and a list of argument expressions, separated by commas and enclosed in `()`:
```hlsl
myFunction( 1.0f, 20 )
```
When the base expression (e.g., `myFunction`) is overloaded, a call expression can disambiguate the overloaded expression based on the number and type or arguments present.
The base expression of a call may be a member reference expression:
```hlsl
myObject.myFunc( 1.0f )
```
In this case the base expression of the member reference (e.g., `myObject` in this case) is used as the argument for the implicit `this` parameter of the callee.
### Mutability
If a `[mutating]` instance is being called, the argument for the implicit `this` parameter must be an l-value.
The argument expressions corresponding to any `out` or `in out` parameters of the callee must be l-values.
A call expression is never an l-value.
### Initializer Expressions
When the base expression of a call is a type instead of a value, the expression is an initializer expression:
```hlsl
float2(1.0f, 2.0f)
```
An initializer expression initialized an instance of the specified type using the given arguments.
An initializer expression with only a single argument is treated as a cast expression:
```hlsl
// these are equivalent
int(1.0f)
(int) 1.0f
```
Subscript Expression
--------------------
A _subscript expression_ consists of a base expression and a list of argument expressions, separated by commas and enclosed in `[]`:
```hlsl
myVector[someIndex]
```
A subscript expression invokes one of the subscript declarations in the type of the base expression. Which subscript declaration is invoked is resolved based on the number and types of the arguments.
A subscript expression is an l-value if the base expression is an l-value and if the subscript declaration it refers to has a setter or by-reference accessor.
Subscripts may be formed on the built-in vector, matrix, and array types.
Initializer List Expression
---------------------------
An _initializer list expression_ comprises zero or more expressions, separated by commas, enclosed in `{}`:
```
{ 1, "hello", 2.0f }
```
An initialier list expression may only be used directly as the initial-value expression of a variable or parameter declaration; initializer lists are not allowed as arbitrary sub-expressions.
> Note: This section will need to be updated with the detailed rules for how expressions in the initializer list are used to initialize values of each kind of type.
Cast Expression
---------------
A _cast expression_ attempt to coerce a single value (the base expression) to a desired type (the target type):
```hlsl
(int) 1.0f
```
A cast expression can perform both built-in type conversions and invoke any single-argument initializers of the target type.
### Compatibility Feature
As a compatibility feature for older code, Slang supports using a cast where the base expression is an integer literal zero and the target type is a user-defined structure type:
```hlsl
MyStruct s = (MyStruct) 0;
```
The semantics of such a cast are equivalent to initialization from an empty initializer list:
```hlsl
MyStruct s = {};
```
Assignment Expression
---------------------
An _assignment expression_ consists of a left-hand side expression, an equals sign (`=`), and a right-hand-side expressions:
```hlsl
myVar = someValue
```
The semantics of an assignment expression are to:
* Evaluate the left-hand side to produce an l-value,
* Evaluate the right-hand side to produce a value
* Store the value of the right-hand side to the l-value of the left-hand side
* Yield the l-value of the left-hand-side
Operator Expressions
--------------------
### Prefix Operator Expressions
The following prefix operators are supported:
| Operator | Description |
|-----------|-------------|
| `+` | identity |
| `-` | arithmetic negation |
| `~` | bit-wise Boolean negation |
| `!` | Boolean negation |
| `++` | increment in place |
| `--` | decrement in place |
A prefix operator expression like `+val` is equivalent to a call expression to a function of the matching name `operator+(val)`, except that lookup for the function only considers functions marked with the `__prefix` keyword.
The built-in prefix `++` and `--` operators require that their operand is an l-value, and work as follows:
* Evaluate the operand to produce an l-value
* Read from the l-value to yield an _old value_
* Increment or decrement the value to yield a _new value_
* Write the new value to the l-value
* Yield the new value
### Postfix Operator Expressions
The following postfix operators are supported:
| Operator | Description |
|-----------|-------------|
| `++` | increment in place |
| `--` | decrement in place |
A postfix operator expression like `val++` is equivalent to a call expression to a function of the matching name `operator++(val)`, except that lookup for the function only considers functions marked with the `__postfix` keyword.
The built-in prefix `++` and `--` operators require that their operand is an l-value, and work as follows:
* Evaluate the operand to produce an l-value
* Read from the l-value to yield an _old value_
* Increment or decrement the value to yield a _new value_
* Write the new value to the l-value
* Yield the old value
### Infix Operator Expressions
The follow infix binary operators are supported:
| Operator | Kind | Description |
|-----------|-------------|-------------|
| `*` | Multiplicative | multiplication |
| `/` | Multiplicative | division |
| `%` | Multiplicative | remainder of division |
| `+` | Additive | addition |
| `-` | Additive | subtraction |
| `<<` | Shift | left shift |
| `>>` | Shift | right shift |
| `<` | Relational | less than |
| `>` | Relational | greater than |
| `<=` | Relational | less than or equal to |
| `>=` | Relational | greater than or equal to |
| `==` | Equality | equal to |
| `!=` | Equality | not equal to |
| `&` | BitAnd | bitwise and |
| `^` | BitXor | bitwise exclusive or |
| `\|` | BitOr | bitwise or |
| `&&` | And | logical and |
| `\|\|` | Or | logical or |
| `+=` | Assignment | compound add/assign |
| `-=` | Assignment | compound subtract/assign |
| `*=` | Assignment | compound multiply/assign |
| `/=` | Assignment | compound divide/assign |
| `%=` | Assignment | compound remainder/assign |
| `<<=` | Assignment | compound left shift/assign |
| `>>=` | Assignment | compound right shift/assign |
| `&=` | Assignment | compound bitwise and/assign |
| `\|=` | Assignment | compound bitwise or/assign |
| `^=` | Assignment | compound bitwise xor/assign |
| `=` | Assignment | assignment |
| `,` | Sequencing | sequence |
With the exception of the assignment operator (`=`), an infix operator expression like `left + right` is equivalent to a call expression to a function of the matching name `operator+(left, right)`.
### Conditional Expression
The conditional operator, `?:`, is used to select between two expressions based on the value of a condition:
```hlsl
useNegative ? -1.0f : 1.0f
```
The condition may be either a single value of type `bool`, or a vector of `bool`.
When a vector of `bool` is used, the two values being selected between must be vectors, and selection is performed component-wise.
> Note: Unlike C, C++, GLSL, and most other C-family languages, Slang currently follows the precedent of HLSL where `?:` does not short-circuit.
>
> This decision may change (for the scalar case) in a future version of the language.
> Programmer are encouraged to write code that does not depend on whether or not `?:` short-circuits.

View file

@ -1,237 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Statements
==========
Statements are used to define the bodies of functions and determine order of evaluation and control flow for an entire program.
Statements are distinct from expressions in that statements do not yield results and do not have types.
This section lists the kinds of statements supported by Slang.
Expression Statement
--------------------
An expression statement consists of an expression followed by a semicolon:
```hlsl
doSomething();
a[10] = b + 1;
```
An implementation may warn on an expression statement that has to effect on the results of execution.
Declaration Statement
---------------------
A declaration may be used as a statement:
```hlsl
let x = 10;
var y = x + 1;
int z = y - x;
```
> Note: Currently only variable declarations are allowed in statement contexts, but other kinds of declarations may be enabled in the future.
Block Statement
---------------
A block statement consists of zero or more statements wrapped in curly braces `{}`:
```hlsl
{
int x = 10;
doSomething(x);
}
```
A block statement provides local scoping to declarations.
Declarations in a block are visible to later statements in the same block, but not to statements or expressions outside of the block.
Empty Statement
---------------
A single semicolon (`;`) may be used as an empty statement equivalent to an empty block statement `{}`.
Conditional Statements
----------------------
### If Statement
An _if statement_ consists of the `if` keyword and a conditional expression in parentheses, followed by a statement to execute if the condition is true:
```hlsl
if(somethingShouldHappen)
doSomething();
```
An if statement may optionally include an _else clause_ consisting of the keyword `else` followed by a statement to execute if the condition is false:
```hlsl
if(somethingShouldHappen)
doSomething();
else
doNothing();
```
### Switch Statement
A _switch statement_ consists of the `switch` keyword followed by an expression wrapped in parentheses and a _body statement_:
```hlsl
switch(someValue)
{
...
}
```
The body of a switch statement must be a block statement, and its body must consist of switch case clauses.
A _switch case clause_ consists of one or more case labels or default labels, followed by one or more statements:
```hlsl
// this is a switch case clause
case 0:
case 1:
doBasicThing();
break;
// this is another switch case clause
default:
doAnotherThing();
break;
```
A _case label_ consists of the keyword `case` followed by an expressions and a colon (`:`).
The expression must evaluate to a compile-time constant integer.
A _default label_ consists of the keyword `default` followed by a colon (`:`).
It is an error for a case label or default label to appear anywhere other than the body of a `switch` statement.
It is an error for a statement to appear inside the body of a `switch` statement that is no part of a switch case clause.
Each switch case clause must exit the `switch` statement via a `break` or other control transfer statement.
"Fall-through" from one switch case clause to another is not allowed.
Loop Statements
---------------
### For Statement
A _for statement_ uses the following form:
```hlsl
for( <initial statement> ; <condition expression> ; <side effect expression> ) <body statement>
```
The _initial statement_ is optional, but may declare a variable whose scope is limited to the for statement.
The _condition expression_ is optional. If present it must be an expression that can be coerced to type `bool`. If absent, a true value is used as the condition.
The _side effect expression_ is optional. If present it will executed for its effects before each testing the condition for every loop iteration after the first.
The _body statement_ is a statement that will be executed for each iteration of the loop.
### While Statement
A _while statement_ uses the following form:
```hlsl
while( <condition expression> ) <body statement>
```
and is equivalent to a `for` loop of the form:
```hlsl
for( ; <condition expression> ; ) <body statement>
```
### Do-While Statement
A _do-while statement_ uses the following form:
```hlsl
do <body statement> while( <condition expression> )
```
and is equivalent to a `for` loop of the form:
```hlsl
for(;;)
{
<body statement>
if(<condition expression>) continue; else break;
}
```
Control Transfer Statements
---------------------------
### Break Statement
A `break` statement transfers control to after the end of the closest lexically enclosing switch statement or loop statement:
```hlsl
break;
```
### Continue Statement
A `continue` statement transfers control to the start of the next iteration of a loop statement.
In a for statement with a side effect expression, the side effect expression is evaluated when `continue` is used:
```hlsl
break;
```
### Return Statement
A `return` statement transfers control out of the current function.
In the body of a function with a `void` result type, the `return` keyword may be followed immediately by a semicolon:
```hlsl
return;
```
Otherwise, the `return` keyword must be followed by an expression to use as the value to return to the caller:
```hlsl
return someValue;
```
The value returned must be able to coerce to the result type of the lexically enclosing function.
### Discard Statement
A `discard` statement can only be used in the context of a fragment shader, in which case it causes the current invocation to terminate and the graphics system to discard the corresponding fragment so that it does not get combined with the framebuffer pixel at its coordinates.
Operations with side effects that were executed by the invocation before a `discard` will still be performed and their results will become visible according to the rules of the platform.
Compile-Time For Statement
--------------------------
A _compile-time for statement_ is used as an alternative to preprocessor techniques for loop unrolling.
It looks like:
```hlsl
$for( <name> in Range(<initial-value>, <upper-bound>)) <body statement>
```
The _initial value_ and _upper bound_ expressions must be compile-time constant integers.
The semantics of a compile-time for statement are as if it were expanded into:
```hlsl
{
let <name> = <initial-value>;
<body statement>
}
{
let <name> = <initial-value> + 1;
<body statement>
}
...
{
let <name> = <upper-bound> - 1;
<body statement>
}
```

View file

@ -1,770 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Declarations
============
Modules
-------
A module consists of one or more source units that are compiled together.
The global declarations in those source units comprise the body of the module.
In general, the order of declarations within a source unit does not matter; declarations can refer to other declarations (of types, functions, variables, etc.) later in the same source unit.
Declarations (other than `import` declarations) may freely be defined in any source unit in a module; declarations in one source unit of a module may freely refer to declarations in other source units.
Imports
-------
An import declaration is introduced with the keyword `import`:
```hlsl
import Shadowing;
```
An import declaration searches for a module matching the name given in the declaration, and brings the declarations in that module into scope in the current source unit.
> Note: an `import` declaration only applies to the scope of the current source unit, and does *not* import the chosen module so that it is visible to other source units of the current module.
The name of the module being imported may use a compound name:
```hlsl
import MyApp.Shadowing;
```
The mechanism used to search for a module is implementation-specific.
> Note: The current Slang implementation searches for a module by translating the specified module name into a file path by:
>
> * Replacing any dot (`.`) separators in a compound name with path separators (e.g., `/`)
>
> * Replacing any underscores (`_`) in the name with hyphens (`-`)
>
> * Appending the extension `.slang`
>
> The implementation then looks for a file matching this path on any of its configured search paths.
> If such a file is found it is loaded as a module comprising a single source unit.
The declarations of an imported module become visible to the current module, but they are not made visible to code that later imports the current module.
> Note: An experimental feature exists for an "exported" import declaration:
>
> ```hlsl
> // inside A.slang
> __exported import Shadowing;
> ```
>
> This example imports the declarations from `Shadowing` into the current module (module `A`),
> and also sets up information so that if other code declares `import A` then it can see
> both the declarations in `A` and those in `Shadowing`.
> Note: Mixing `import` declarations and traditional preprocessor-based (`#include`) modularity
> in a codebase can lead to surprising results.
>
> Some things to be aware of:
>
> * Preprocessor definitions in your module do *not* affect the code of modules you `import`.
>
> * Preprocessor definitions in a module you `import` do *not* affect your code
>
> * The above caveats also apply to "include guards" and `#pragma once`, since they operate at the granularity of a source unit (not across modules)
>
> * If you `import` two modules, and then both `#include` the same file, then those two modules may end up with duplicate declarations with the same name.
>
> As a general rule, be wary of preprocessor use inside of code meant to be an `import`able module.
Variables
---------
Variables are declared using the keywords `let` and `var`:
```hlsl
let x = 7;
var y = 9.0;
```
A `let` declaration introduces an immutable variable, which may not be assigned to or used as the argument for an `in out` or `out` parameter.
A `var` declaration introduces a mutable variable.
An explicit type may be given for a variable by placing it after the variable name and a colon (`:`):
```hlsl
let x : int = 7;
var y : float = 9.0;
```
If no type is specified for a variable, then a type will be inferred from the initial-value expression.
It is an error to declare a variable that has neither a type specifier or an initial-value expression.
It is an error to declare a variable with `let` without an initial-value expression.
A variable declared with `var` may be declared without an initial-value expression if it has an explicit type specifier:
```
var y : float;
```
In this case the variable is _uninitialized_ at the point of declaration, and must be explicitly initialized by assigning to it.
Code that uses the value of an uninitialized variable may produce arbitrary results, or even exhibit undefined behavior depending on the type of the variable.
Implementations *may* issue an error or warning for code that might make use of an uninitialized variable.
### Traditional Syntax
Variables may also be declared with traditional C-style syntax:
```hlsl
const int x = 7;
float y = 9.0;
```
For traditional variable declarations a type must be specified.
> Note: Slang does not support an `auto` type specifier like C++.
Traditional variable declarations are immutable if they are declared with the `const` modifier, and are otherwise mutable.
### Variables at Global Scope
Variables declared at global scope may be either a global constant, a static global variables, or a global shader parameters.
#### Global Constants
A variable declared at global scope and marked with `static` and `const` is a _global constant_.
A global constant must have an initial-value expression, and that initial-value expression must be a compile-time constant expression.
#### Static Global Variables
A variable declared at global scope and marked with `static` (but not with `const`) is a _static global variable_.
A static global variable provides storage for each invocation executing an entry point.
Assignments to a static global variable from one invocation do not affect the value seen by other invocations.
> Note: the semantics of static global variable are similar to a "thread-local" variable in other programming models.
A static global variable may include an initial-value expression; if an initial-value expression is included it is guaranteed to be evaluated and assigned to the variable before any other expression that references the variable is evaluated.
There is no guarantee that the initial-value expression for a static global variable is evaluated before entry point execution begins, or even that the initial-value expression is evaluated at all (in cases where the variable might not be referenced at runtime).
> Note: the above rules mean that an implementation may perform dead code elimination on static global variables, and may choose between eager and lazy initialization of those variables at its discretion.
#### Global Shader Parameters
A variable declared at global scope and not marked with `static` (even if marked with `const`) is a _global shader parameter_.
Global shader parameters are used to pass arguments from application code into invocations of an entry point.
The mechanisms for parameter passing are specific to each target platform.
> Note: Currently only global shader parameters of opaque types or arrays of opaque types are supported.
A global shader parameter may include an initial-value epxression, but such an expression does not affect the semantics of the compiled program.
> Note: Initial-value expressions on global shader parameters are only useful to set up "default values" that can be read via reflection information and used by application code.
### Variables at Function Scope
Variables declared at _function scope_ (in the body of a function, initializer, subscript accessor, etc.) may be either a function-scope constant, function-scope static variable, or a local variable.
#### Function-Scope Constants
A variable declared at function scope and marked with both `static` and `const` is a _function-scope constant_.
Semantically, a function-scope constant behaves like a global constant except that is name is only visible in the local scope.
#### Function-Scope Static Variables
A variable declared at function scope and marked with `static` (but not `const`) is a _function-scope static variable_.
Semantically, a function-scope static variable behaves like a global static variable except that its name is only visible in the local scope.
The initial-value expression for a function-scope static variable may refer to non-static variables in the body of the function.
In these cases initialization of the variable is guaranteed not to occur until at least the first time the function body is evaluated for a given invocation.
#### Local Variables
A variable declared at function scope and not marked with `static` (even if marked with `const`) is a _local variable_.
A local variable has unique storage for each _activation_ of a function by an invocation.
When a function is called recursively, each call produces a distinct activation with its own copies of local variables.
Functions
---------
Functions are declared using the `func` keyword:
```hlsl
func add(x: int, y: float) -> float { return float(x) + y; }
```
Parameters
----------
The parameters of the function are declared as `name: type` pairs.
Parameters may be given a _default value_ by including an initial-value-expression clause:
```hlsl
func add(x: int, y: float = 1.0f) { ... }
```
Parameters may be marked with a _direction_ which affects how data is passed between caller and callee:
```hlsl
func add(x: in out int, y : float) { x += ... }
```
The available directions are:
* `in` (the default) indicates typical pass-by-value (copy-in) semantics. The callee receives a *copy* of the argument passed by the caller.
* `out` indicates copy-out semantics. The callee writes to the parameter and then a copy of that value is assigned to the argument of the caller after the call returns.
* `in out` or `inout` indicates pass-by-value-result (copy-in and copy-out) semantics. The callee receives a copy of the argument passed by the caller, it may manipulate the copy, and then when the call returns the final value is copied back to the argument of the caller.
An implementation may assume that at every call site the arguments for `out` or `in out` parameters never alias.
Under those assumptions, the `out` and `inout` cases may be optimized to use pass-by-reference instead of copy-in and copy-out.
> Note: Applications that rely on the precise order in which write-back for `out` and `in out` parameters is performed are already on shaky semantic ground.
Body
----
The _body_ of a function declaration consists of statements enclosed in curly braces `{}`.
In some cases a function declaration does not include a body, and in these cases the declaration must be terminated with a semicolon (`;`):
```hlsl
func getCount() -> int;
```
> Note: Slang does not require "forward declaration" of functions, although
> forward declarations are supported as a compatibility feature.
>
> The only place where a function declaration without a definition should be
> required is in the body of an `interface` declaration.
The result type of a function mayb be specified after the parameter list using a _result type clause_ consisting of an arrow (`->`) followed by a type.
If the function result type is `void`, the result type clause may be elided:
```hlsl
func modify(x: in out int) { x++; }
```
### Traditional Syntax
Functions can also be declared with traditional C-style syntax:
```hlsl
float add(int x, float y) { return float(x) + y; }
void modify(in out int x) { x ++; }
```
> Note: Currently traditional syntax must be used for shader entry point functions,
> because only the traditional syntax currently supports attaching semantics to
> parameters.
### Entry Points
An _entry point_ is a function that will be used as the starting point of execution for one or more invocations of a shader.
Structure Types
---------------
Structure types are declared using the `struct` keyword:
```hlsl
struct Person
{
var age : int;
float height;
int getAge() { return age; }
func getHeight() -> float { return this.height; }
static func getPopulation() -> int { ... }
}
```
The body of a structure type declaration may include variable, type, function, and initializer declarations.
### Fields
Variable declarations in the body of a structure type declaration are also referred to as _fields_.
A field that is marked `static` is shared between all instances of the type, and is semantically like a global variable marked `static`.
A non-`static` field is also called an _instance field_.
### Methods
Function declarations in the body of a structure type declaration are also referred to as _methods_.
A method declaration may be marked `static`.
A `static` method must be invoked on the type itself (e.g., `Person.getPopulation()`).
A non-`static` method is also referred to as an _instance method_.
Instance methods must be invoked on an instance of the type (e.g., `somePerson.getAge()`).
The body of an instance method has access to an implicit `this` parameter which refers to the instance on which the method was invoked.
By default the `this` parameter of an instance method acts as an immutable variable.
An instance method with the `[mutating]` attribute receives a mutable `this` parameter, and can only be invoked on a mutable value of the structure type.
### Inheritance
A structure type declaration may include an _inheritance clause_ that consists of a colon (`:`) followed by a comma-separated list of types that the structure type inherits from:
```
struct Person : IHasAge, IHasName
{ .... }
```
When a structure type declares that it inherits from an interface, the programmer asserts that the structure type implements the required members of the interface.
### Syntax Details
A structure declaration does *not* need to be terminated with a semicolon:
```hlsl
// A terminating semicolon is allowed
struct Stuff { ... };
// The semicolon is not required
struct Things { ... }
```
When a structure declarations ends without a semicolon, the closing curly brace (`}`) must be the last non-comment, non-whitespace token on its line.
For compatibility with C-style code, a structure type declaration may be used as the type specifier in a traditional-style variable declaration:
```hlsl
struct Association
{
int from;
int to;
} associations[] =
{
{ 1, 1 },
{ 2, 4 },
{ 3, 9 },
};
```
If a structure type declaration will be used as part of a variable declaration, then the next token of the variable declaration must appear on the same line as the closing curly brace (`}`) of the structure type declaration.
The whole variable declaration must be terminated with a semicolon (`;`) as normal.
Enumeration Types
-----------------
Enumeration type declarations are introduced with the `enum` keyword:
```hlsl
enum Color
{
Red,
Green = 3,
Blue,
}
```
### Cases
The body of an enumeration type declaration consists of a comma-separated list of case declarations.
An optional trailing comma may terminate the lis of cases.
A _case declaration_ consists of the name of the case, along with an optional initial-value expression that specifies the _tag value_ for that case.
If the first case declaration in the body elides an initial-value expression, the value `0` is used for the tag value.
If any other case declaration elides an initial-value expressions, its tag value is one greater than the tag value of the immediately preceding case declaration.
An enumeration case is referred to as if it were a `static` member of the enumeration type (e.g., `Color.Red`).
### Inheritance
An enumeration type declaration may include an inheritance clause:
```hlsl
enum Color : uint
{ ... }
```
The inheritance clause of an enumeration declaration may currently only be used to specify a single type to be used as the _tag type_ of the enumeration type.
The tag type of an enumeration must be a built-in scalar integer type.
The tag value of each enumeration case will be a value of the tag type.
If no explicit tag type is specified, the type `int` is used instead.
> Note: The current Slang implementation has bugs that prevent explicit tag types from working correctly.
### Conversions
A value of an enumeration type can be implicitly converted to a value of its tag type:
```hlsl
int r = Color.Red;
```
Values of the tag type can be explicitly converted to the enumeration type:
```hlsl
Color red = Color(r);
```
Type Aliases
------------
A type alias is declared using the `typealias` keyword:
```hlsl
typealias Height = int;
```
A type alias defines a name that will be equivalent to the type to the right of `=`.
### Traditional Syntax
Type aliases can also be declared with traditional C-style syntax:
```hlsl
typedef int Height;
```
Constant Buffers and Texture Buffers
------------------------------------
As a compatibility feature, the `cbuffer` and `tbuffer` keywords can be used to introduce variable declarations.
A declaration of the form:
```hlsl
cbuffer Name
{
F field;
// ...
}
```
is equivalent to a declaration of the form:
```hlsl
struct AnonType
{
F field;
// ...
}
__transparent ConstantBuffer<AnonType> anonVar;
```
In this expansion, `AnonType` and `anonVar` are fresh names generated for the expansion that cannot collide with any name in user code, and the modifier `__transparent` makes it so that an unqualified reference to `field` can implicitly resolve to `anonVar.field`.
The keyword `tbuffer` uses an equivalent expansion, but with `TextureBuffer<T>` used instead of `ConstantBuffer<T>`.
Interfaces
----------
An interface is declared using the `interface` keyword:
```hlsl
interface IRandom
{
uint next();
}
```
The body of an interface declaration may contain function, initializer, subscript, and associated type declarations.
Each declaration in the body of an interface introduces a _requirement_ of the interface.
Types that declare conformance to the interface must provide matching implementations of the requirements.
Functions, initializers, and subscripts declared inside an interface must not have bodies; default implementations of interface requirements are not currently supported.
An interface declaration may have an inheritance clause:
```hlsl
interface IBase
{
int getBase();
}
interface IDerived : IBase
{
int getDerived();
}
```
The inheritance clause for an interface must only list other interfaces.
If an interface `I` lists another interface `J` in its inheritance clause, then `J` is a _base interface_ of `I`.
In order to conform to `I`, a type must also conform to `J`.
Associated Types
----------------
An associated type declaration is introduced with `associatedtype`:
```hlsl
associatedtype Iterator;
```
An associated type declaration introduces a type into the signature of an interface, without specifying the exact concrete type to use.
An associated type is an interface requirement, and different implementations of an interface may provide different types that satisfy the same associated type interface requirement:
```
interface IContainer
{
associatedtype Iterator;
...
}
struct MyArray : IContainer
{
typealias Iterator = Int;
...
}
struct MyLinkedList : IContainer
{
struct Iterator { ... }
...
}
```
It is an error to declare an associated type anywhere other than the body of an interface declaration.
An associated type declaration may have an inheritance clause.
The inheritance clause of an associated type may only list interfaces; these are the _required interfaces_ for the associated type.
A concrete type that is used to satisfy an associated type requirement must conform to all of the required interfaces of the associated type.
Initializers
------------
An initializer declaration is introduced with the `__init` keyword:
```hlsl
struct MyVector
{
float x, float y;
__init(float s)
{
x = s;
y = s;
}
}
```
> Note: Initializer declarations are a non-finalized and unstable feature, as indicated by the double-underscore (`__`) prefix on the keyword.
> Arbitrary changes to the syntax and semantics of initializers may be introduced in future versions of Slang.
An initializer declaration may only appear in the body of an interface or a structure type.
An initializer defines a method for initializing an instance of the enclosing type.
> Note: A C++ programmer might think of an initializer declaration as similar to a C++ _constructor_.
An initializer has a parameter list and body just like a function declaration.
An initializer must not include a result type clause; the result type of an initializer is always the enclosing type.
An initializer is invoked by calling the enclosing type as if it were a function.
E.g., in the example above, the initializer in `MyVector` can be invoked as `MyVector(1.0f)`.
An initializer has access to an implicit `this` variable that is the instance being initialized; an initializer must not be marked `static`.
The `this` variable of an initializer is always mutable; an initializer need not, and must not, be marked `[mutating]`.
> Note: Slang currently does not enforce that a type with an initializer can only be initialized using its initializers.
> It is possible for user code to declare a variable of type `MyVector` above, and explicitly write to the `x` and `y` fields to initialize it.
> A future version of the language may close up this loophole.
> Note: Slang does not provide any equivalent to C++ _destructors_ which run automatically when an instance goes out of scope.
Subscripts
----------
A subscript declaration is introduced with the `__subscript` keyword:
```hlsl
struct MyVector
{
...
__subscript(int index) -> float
{
get { return index == 0 ? x : y; }
}
}
```
> Note: subscript declarations are a non-finalized and unstable feature, as indicated by the double-underscore (`__`) prefix on the keyword.
> Arbitrary changes to the syntax and semantics of subscript declarations may be introduced in future versions of Slang.
A subscript declaration introduces a way for a user-defined type to support subscripting with the `[]` braces:
```hlsl
MyVector v = ...;
float f = v[0];
```
A subscript declaration lists one or more parameters inside parentheses, followed by a result type clause starting with `->`.
The result type clause of a subscript declaration cannot be elided.
The body of a subscript declaration consists of _accessor declarations_.
Currently only `get` accessor declarations are supported for user code.
A `get` accessor declaration introduces a _getter_ for the subscript.
The body of a getter is a code block like a function body, and must return the appropriate value for a subcript operation.
The body of a getter can access the parameters of the enclosing subscript, as a well as an implicit `this` parameter of the type that encloses the accessor.
The `this` parameter of a getter is immutable; `[mutating]` getters are not currently supported.
Extensions
----------
An extension declaration is introduced with the `extension` keyword:
```hlsl
extension MyVector
{
float getLength() { return sqrt(x*x + y*y); }
static int getDimensionality() { return 2; }
}
```
An extension declaration adds behavior to an existing type.
In the example above, the `MyVector` type is extended with an instance method `getLength()`, and a static method `getDimensionality()`.
An extension declaration names the type being extended after the `extension` keyword.
The body of an extension declaration may include type declarations, functions, initializers, and subscripts.
> Note: The body of an extension may *not* include variable declarations.
> An extension cannot introduce members that would change the in-memory layout of the type being extended.
The members of an extension are accessed through the type that is being extended.
For example, for the above extension of `MyVector`, the introduced methods are accessed as follows:
```hlsl
MyVector v = ...;
float f = v.getLength();
int n = MyVector.getDimensionality();
```
An extension declaration need not be placed in the same module as the type being extended; it is possible to extend a type from third-party or standard module code.
The members of an extension are only visible inside of modules that `import` the module declaring the extension;
extension members are *not* automatically visible wherever the type being extended is visible.
An extension declaration may include an inheritance clause:
```hlsl
extension MyVector : IPrintable
{
...
}
```
The inheritance clause of an extension declaration may only include interfaces.
When an extension declaration lists an interface in its inheritance clause, it asserts that the extension introduces a new conformance, such that the type being extended now conforms to the given interface.
The extension must ensure that the type being extended satisfies all the requirements of the interface.
Interface requirements may be satisfied by the members of the extension, members of the original type, or members introduced through other extensions visible at the point where the conformance was declared.
It is an error for overlapping conformances (that is, of the same type to the same interface) to be visible at the same point.
This includes cases where two extensions declare the same conformance, as well as those where the original type and an extension both declare the same conformance.
The conflicting conformances may come from the same module or difference modules.
In order to avoid problems with conflicting conformances, when a module `M` introduces a conformance of type `T` to interface `I`, one of the following should be true:
* the type `T` is declared in module `M`, or
* the type `I` is declared in module `M`
Any conformance that does not follow these rules (that is, where both `T` and `I` are imported into module `M`) is called a _retroactive_ conformance, and there is no way to guarantee that another module `N` will not introduce the same conformance.
The runtime behavior of programs that include overlapping retroactive conformances is currently undefined.
Currently, extension declarations can only apply to structure types; extensions cannot apply to enumeration types or interfaces.
Generics
--------
Many kinds of declarations can be made _generic_: structure types, interfaces, extensions, functions, initializers, and subscripts.
A generic declaration introduces a _generic parameter list_ enclosed in angle brackets `<>`:
```hlsl
T myFunction<T>(T left, T right, bool condition)
{
return condition ? left : right;
}
```
### Generic Parameters
A generic parameter list can include one or more parameters separated by commas.
The allowed forms for generic parameters are:
* A single identifier like `T` is used to declare a _generic type parameter_ with no constraints.
* A clause like `T : IFoo` is used to introduce a generic type parameter `T` where the parameter is _constrained_ so that it must conform to the `IFoo` interface.
* A clause like `let N : int` is used to introduce a generic value parameter `N`, which takes on values of type `int`.
> Note: The syntax for generic value parameters is provisional and subject to possible change in the future.
Generic parameters may declare a default value with `=`:
```hlsl
T anotherFunction<T = float, let N : int = 4>(vector<T,N> v);
```
For generic type parameters, the default value is a type to use if no argument is specified.
For generic value parameters, the default value is a value of the same type to use if no argument is specified.
### Explicit Specialization
A generic is _specialized_ by applying it to _generic arguments_ listed inside angle brackets `<>`:
```hlsl
anotherFunction<int, 3>
```
Specialization produces a reference to the declaration with all generic parameters bound to concrete arguments.
When specializing a generic, generic type parameters must be matched with type arguments that conform to the constraints on the parameter, if any.
Generic value parameters must be matched with value arguments of the appropriate type, and that are specialization-time constants.
An explicitly specialized function, type, etc. may be used wherever a non-generic function, type, etc. is expected:
```hlsl
int i = anotherFunction<int,3>( int3(99) );
```
### Implicit Specialization
If a generic function/type/etc. is used where a non-generic function/type/etc. is expected, the compiler attempts _implicit specialization_.
Implicit specialization infers generic arguments from the context at the use site, as well as any default values specified for generic parameters.
For example, if a programmer writes:
```hlsl
int i = anotherFunction( int3(99) );
```
The compiler will infer the generic arguments `<int, 3>` from the way that `anotherFunction` was applied to a value of type `int3`.
> Note: Inference for generic arguments currently only takes the types of value arguments into account.
> The expected result type does not currently affect inference.
### Syntax Details
The following examples show how generic declarations of different kinds are written:
```
T genericFunction<T>(T value);
funct genericFunction<T>(value: T) -> T;
__init<T>(T value);
__subscript<T>(T value) -> X { ... }
struct GenericType<T>
{
T field;
}
interface IGenericInterface<T> : IBase<T>
{
}
```
> Note: Currently there is no user-exposed syntax for writing a generic extension.

View file

@ -1,32 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Attributes
==========
> Note: This section is not yet complete.
## [[vk::spirv_instruction]]
** SPIR-V only **
This attribute is only available for Vulkan SPIR-V output.
The attribute allows access to SPIR-V intrinsics, by supplying a function declaration with the appropriate signature for the SPIR-V op and no body. The intrinsic takes a single parameter which is the integer value for the SPIR-V op.
In the example below the add function, uses the mechanism to directly use the SPIR-V integer add 'op' which is 128 in this case.
```HLSL
// 128 is OpIAdd in SPIR-V
[[vk::spirv_instruction(128)]]
uint add(uint a, uint b);
RWStructuredBuffer<uint> resultBuffer;
[numthreads(4,1,1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
uint threadId = dispatchThreadID.x;
resultBuffer[threadId] = add(threadId, threadId);
}
```

View file

@ -1,16 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Slang Language Reference
========================
Contents
--------
* [1 - Introduction](01-introduction.md)
* [2 - Lexical Structure](02-lexical-structure.md)
* [3 - Preprocessor](03-preprocessor.md)
* [4 - Types](04-types.md)
* [5 - Expressions](05-expressions.md)
* [6 - Statements](06-statements.md)
* [7 - Declarations](07-declarations.md)
* [8 - Attributes](08-attributes.md)

View file

@ -1,228 +0,0 @@
Parameter Layout Rules
======================
An important goal of the Slang project is that the rules for how shader parameters get assigned to `register`s/`binding`s is completely deterministic, so that users can rely on the compiler's behavior.
This document will attempt to explain the rules that Slang employs at a high level.
Eventually it might evolve into a formal specification of the expected behavior.
Guarantees
----------
The whole point of having a deterministic layout approach is the guarantees that it gives to users, so we will start by explicitly stating the guarantees that users can rely upon:
* A single top-level shader parameter will always occupy a contiguous range of bindings/registers for each resource type it consumes (e.g., a contiguous range of `t` registers, a contiguous range of bytes in a `cbuffer`, etc.).
* The amount of resources a parameter consumes depends only on its type, and top-level context in which it appears (e.g., is it in a `cbuffer`? an entry-point varying parameter? etc.).
* A shader parameter that is declared the same way in two different programs will get the same *amount* of resources (registers/bytes) allocated for it in both programs, but it might get a different starting offset/register.
* Changing the bodies of functions in shader code cannot change the layout of shader parameters. In particular, just because a shader parameter is "dead" does not mean it gets eliminated.
* If the user doesn't use explicit `register`/`layout` modifiers to bind parameters, then each module will get a contiguous range of bindings, and the overall program will always use a contiguous range starting from zero for each resource type.
Overview of the Layout Algorithm
--------------------------------
Layout is applied to a Slang *compile request* which comprises one or more *translation units* of user code, and zero or more `import`ed modules.
The compile request also specifies zero or more *entry points* to be compiled, where each entry point identifies a function and a profile to use.
Layout is always done with respect to a chosen *target*, and different targets might compute the resource usage for types differently, or apply different alignment.
Within a single target there may also be different layout rules (e.g., the difference between GLSL `std140` and `std430`).
Layout proceeds in four main phases:
1. Establish a global ordering on shader parameters
2. Compute the resource requirements of each shader parameter
3. Process shader parameters with fixed binding modifiers
4. Allocate bindings to parameter without fixed binding modifiers
Ordering (and Collapsing) Shader Parameters
-------------------------------------------
Shader parameters from the user's code always precede shader parameters from imported modules.
The order of parameters in the user's code is derived by "walking" through the code as follows:
* Walk through each translation unit in the order they were added via API (or the order they were listed on the command line)
* Walk through each source file of a translation unit in the order they were added/listed
* Walk through global-scope shader parameter declarations (global variables, `cbuffer`s, etc.) in the order they are listed in the (preprocessed) file.
* After all global parameters for a translation unit have been walked, walk through any entry points in the translation unit.
* When walking through an entry point, walk through all of its function parameters (both uniforms and varyings) in order, and then walk the function result as a varying output parameter.
When dealing with global-scope parameters in the user's code, it is possible for the "same" parameter to appear in multiple translation units.
Any two global shader parameters in user code with the same name are assumed to represent the same parameter, and will only be included in the global order at the first location where they are seen.
It is an error for the different declarations to have a mismatch in type, or conflicting explicit bindings.
Parameters from `import`ed modules are enumerated after the user code, using the order in which modules were first `import`ed.
The order of parameters within each module is the same as when the module was compiled, which matches the ordering given above.
Computing Resource Requirements
-------------------------------
Each shader parameter computes its resource requirements based on its type, and how it is declared.
* Global-scope parameters, entry point `uniform` parameters, and `cbuffer` declarations all use the "default" layout rules
* Entry point non-`uniform` parameters use "varying" layout rules, either input or output
* A few other special case rules exist (e.g., for laying out the elements of a `StructuredBuffer`), but most users will not need to worry about these
Note that the "default" rules are different for D3D and GL/Vulkan targets, because they have slightly different packing behavior.
### Plain Old Data
Under the default rules simple scalar types (`bool`, `int`, `float`, etc.) are laid out as "uniform" data (that is, bytes of ordinary memory).
In most cases, the size matches the expected data type size (although be aware that most targets treat `bool` as a synonym for `int`) and the alignment is the same as the size.
### Vectors
Vectors are laid out as N sequential scalars.
Under HLSL rules, a vector has the same alignment as its scalar type.
Under GLSL `std140` rules, a vector has an alignment that is its size rounded up to the next power of two (so a `float3` has `float4` alignment).
### Opaque Types
"Opaque" types include resource/sampler types like `Texture2D` and `SamplerState`.
These consume a single "slot" of the appropriate category for the chosen API.
Note that when compiling for D3D, a `Texture2D` and a `SamplerState` will consume different resources (`t` and `s` registers, respectively), but when compiling for Vulkan, they both consume the same resource ("descriptor table slot").
Opaque types currently all have an alignment of one.
### Structures
A structure is laid out by initializing a counter for each resource type, and then processing fields sequential (in declaration order):
* Compute resource usage for the field's type
* Adjust counters based on the alignment of the field for each resource type where it has non-zero usage
* Assign an offset to the field for each resource type where it has non-zero usage
* Add the resource usage of the field to the counters
An important wrinkle is that when doing layout for HLSL, we must ensure that if a field with uniform data that is smaller than 16 bytes would straddle a 16-byte boundary, we advance to the next 16-byte aligned offset.
The overall alignment of a `struct` is the maximum alignment of its fields or the default alignment (if it is larger).
The default alignment is 16 for both D3D and Vulkan targets.
The final resource usage of a `struct` is rounded up to a multiple of the alignment for each resource type. Note that we allow a `struct` to consume zero bytes of uniform storage.
It is important to note that a `struct` type can use resources of many different kinds, so in general we cannot talk about the "size" of a type, but only its size for a particular kind of resource (uniform bytes, texture registers, etc.).
### Sized Arrays
For uniform data, the size of the element type is rounded up to the target-specific minimum (e.g., 16 for D3D and Vulkan constant buffers) to arrive at the *stride* of the array. The total size of the array is then the stride times the element count.
For opaque resource types, the D3D case simply takes the stride to be the number of registers consumed by each element, and multiplies this by the element count.
For Vulkan, an array of resources uses only a single `binding`, so that the stride is always zero for these resource kinds, and the resource usage of an array is the same as its element type.
### Unsized Arrays
The uniform part of an unsized array has the same stride as for the sized case, but an effectively infinite size.
For register/binding resource usage, a Vulkan unsized array is just like a sized one, while a D3D array will consume a full register *space* instead of individual registers.
### Constant Buffers
To determine the resource usage of a constant buffer (either a `cbuffer { ... }` declaration or a `ConstantBuffer<T>`) we look at the resource usage of its element type.
If the element uses any uniform data, the constant buffer will use at least one constant-buffer register (or whatever the target-specific resource is).
If the element uses any non-uniform data, that usage will be added to that of the constant buffer.
### Parameter Blocks
A parameter block is similar to a constant buffer.
If the element type uses any uniform data, we compute resource usage for a constant buffer.
We then add in any non-uniform resource usage for the element types.
If the target requires use of register spaces (e.g., for Vulkan), then a parameter block uses a single register space; otherwise it exposes the resource usage of its element type directly.
Processing Explicit Binding Modifiers
-------------------------------------
If the user put an explicit binding modifier on a parameter, and that modifier applies to the current target, then we use it and "reserve" space in the overall binding range.
Traditional HLSL `register` modifiers only apply for D3D targets.
Slang currently allows GLSL-style `layout(binding =...)` modifiers to be attached to shader parameters, and will use those modifiers for GL/Vulkan targets.
If two parameters reserve overlapping ranges, we currently issue an error.
This may be downgraded to a warning for targets that support overlapping ranges.
Allocating Bindings to Parameters
---------------------------------
Once ranges have been reserved for parameters with explicit bindings, the compiler goes through all parameters again, in the global order and assigns them bindings based on their resource requirements.
For each resource type used by a parameter, it is allocated the first contiguous range of resources of that type that have not been reserved.
Splitting of Arrays
-------------------
In order to support `struct` types that mix uniform and non-uniform data, the Slang compiler always "splits" these types.
For example, given:
```hlsl
struct LightInfo { float3 pos; Texture2D shadowMap; };
LightInfo gLight;
```
Slang will generate code like:
```hlsl
float3 gLight_pos;
Texture2D gLight_shadowMap;
```
In a simple case like the above, this doesn't affect layout at all, but once arrays get involved, the layout can be more complicated. Consider this case:
```hlsl
struct Pair { Texture2D a; Texture2D b; };
Pair gPairs[8];
```
The output from the splitting step is equivalent to:
```hlsl
Texture2D gPairs_a[8];
Texture2D gPairs_b[8];
```
While this transformation is critical for having a type layout algorithm that applies across all APIs (and also it is pretty much required to work around various bugs in downstream compilers), it has the important down-side that the value `gPairs[0]` does not occupy a contiguous range of registers (although the top-level shader parameter `gPairs` *does*).
The Slang reflection API will correctly report the information about this situation:
* The "stride" of the `gPairs` array will be reported as one, because `gPairs[n+1].a` is always one register after `gPairs[n].a`.
* The offset of the `gPairs.b` field will be reported as 8, because `gPairs[0].b` will be 8 registers after the starting register for `gPairs`.
The Slang API tries to provide the best information it can in this case, but it is still important for users who mix arrays and complex `struct` types to know how the compiler will lay them out.
Generics
--------
Generic type parameters complicate these layout rules.
For example, we cannot compute the exact resource requirements for a `vector<T,3>` without knowing what the type `T` is.
When computing layouts for fully specialized types or programs, no special considerations are needed: the rules as described in this document still apply.
One important consequence to understand is that given a type like:
```hlsl
struct MyStuff<T>
{
int a;
T b;
int c;
}
```
the offset computed for the `c` field depends on the concrete type that gets plugged in for `T`.
We think this is the least surprising behavior for programmers who might be familiar with things like C++ template specialization.
In cases where confusion about a field like `c` getting different offsets in different specializations is a concern, users are encouraged to declare types so that all non-generic-dependent fields come before generic-dependent ones.

View file

@ -1,89 +0,0 @@
NVAPI Support
=============
Slang provides support for [NVAPI](https://developer.nvidia.com/nvapi) in several ways
* Slang allows the use of NVAPI directly, by the inclusion of the `#include "nvHLSLExtns.h"` header in your Slang code. Doing so will make all the NVAPI functions directly available and usable within your Slang source code.
* NVAPI is used to provide features implicitly for certain targets. For example support for [RWByteAddressBuffer atomics](target-compatibility.md) on HLSL based targets is supported currently via NVAPI.
* Direct and implicit NVAPI usage can be freely mixed.
Direct usage of NVAPI
=====================
Direct usage of NVAPI just requires the inclusion of the appropriate NVAPI header, typically with `#include "nvHLSLExtns.h` within your Slang source. As is required by NVAPI before the `#include` it is necessary to specify the slot and perhaps space usage. For example a typical direct NVAPI usage inside a Slang source file might contain something like...
```
#define NV_SHADER_EXTN_SLOT u0
#include "nvHLSLExtns.h"
```
In order for the include to work, it is necessary for the include path to include the folder that contains the nvHLSLExtns.h and associated headers.
Implicit usage of NVAPI
=======================
It is convenient and powerful to be able to directly use NVAPI calls, but will only work on such targets that support the mechansism, even if there is a way to support the functionality some other way.
Slang provides some cross platform features on HLSL based targets that are implemented via NVAPI. For example RWByteAddressBuffer atomics are supported on Vulkan, DX12 and CUDA. On DX12 they are made available via NVAPI, whilst CUDA and Vulkan have direct support. When compiling Slang code that uses RWByteAddressBuffer atomics Slang will emit HLSL code that use NVAPI. In order for the downstream compiler to be able to compile this HLSL it must be able to include the NVAPI header `nvHLSLExtns.h`.
It worth discussing briefly how this mechanism works. Slang has a 'prelude' mechanism for different source targets. The prelude is a piece of text that is inserted before the source that is output from compiling the input Slang source code. There is a default prelude for HLSL that is something like
```
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
#endif
```
If there are any calls to NVAPI implicitly from Slang source, then the following is emitted before the prelude
```
#define SLANG_HLSL_ENABLE_NVAPI 1
#define NV_SHADER_EXTN_SLOT u0
#define NV_SHADER_EXTN_REGISTER_SPACE space0
```
Thus causing the prelude to include nvHLSLExtns.h, and specifying the slot and potentially the space as is required for inclusion of nvHLSLExtns.h.
The actual values for the slot and optionally the space, are found by Slang examining the values of those values at the end of preprocessing input Slang source files.
This means that if you compile Slang source that has implicit use NVAPI, the slot and optionally the space must be defined. This can be achieved with a command line -D, through the API or through having suitable `#define`s in the Slang source code.
It is worth noting if you *replace* the default HLSL prelude, and use NVAPI then it will be necessary to have something like the default HLSL prelude part of your custom prelude.
Downstream Compiler Include
---------------------------
There is a subtle detail that is perhaps worth noting here around the downstream compiler and `#include`s. When Slang outputs HLSL it typically does not contain any `#include`, because all of the `#include` in the original source code have been handled by Slang. Slang then outputs everything required to compile to the downstream compiler *without* any `#include`. When NVAPI is used explicitly this is still the case - the NVAPI headers are consumed by Slang, and then Slang will output HLSL that does not contain any `#include`.
The astute reader may have noticed that the default Slang HLSL prelude *does* contain an include, which is enabled via SLANG_HLSL_ENABLE_NVAPI macro which Slang will set with implicit NVAPI use.
```
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
#endif
```
This means that the *downstream* compiler (such as DXC and FXC) must be able to handle this include. Include paths can be specified for downstream compilers via the [-X mechanism](user-guide/08-compiling.md#downstream-arguments). So for example...
```
-Xfxc -IpathTo/nvapi -Xdxc -IpathTo/nvapi
```
In the explicit scenario where `nvHLSLExtns.h` is included in Slang source, the include path must be specified in Slang through the regular mechanisms.
In a scenario with both implicit and explicit use, both Slang *and* the downstream compiler need to have a suitable path specified. Things can be more complicated if there is mixed implicit/explicit NVAPI usage and in the Slang source the include path is set up such that NVAPI is included with
```
#include "nvapi/nvHLSLExtns.h"
```
Since Slang and the downstream compilers can specify different include paths, the downstream compiler include path can be such that `#include "nvHLSLExtns.h"` works with the default prelude.
Another way of working around this issue is to alter the prelude for downstream compilers such that it contains an absolute path for the `#include`. This is the mechanism that is currently used with the Slang test infrastructure.
Links
-----
More details on how this works can be found in the following PR
* [Simplify workflow when using NVAPI #1556](https://github.com/shader-slang/slang/pull/1556)

View file

@ -1,51 +0,0 @@
SP #000: Proposal Template
=================
This document provides a starting point for a larger feature proposal.
The sections in it are suggested, but can be removed if they don't make sense for a chosen feature.
The first section should provide a concise description of **what** the feature is and, if possible, **why** it is important.
A proposal for a Slang language/compiler feature or system should start with a concise description of what the feature it and why it could be important.
Status
------
Status: Design Review/Planned/Implementation In-Progress/Implemented/Partially Implemented. Note here whether the proposal is unimplemented, in-progress, has landed, etc.
Implementation: [PR 000] [PR 001] ... (list links to PRs)
Author: authors of the design doc and the implementation.
Reviewer: Reviewers of the proposal and implementation.
Background
----------
The background section should explain where things stand in the language/compiler today, along with any relevant concepts or terms of art from the wider industry.
If the proposal is about solving a problem, this section should clearly illustrate the problem.
If the proposal is about improving a design, it should explain where the current design falls short.
Related Work
------------
The related work section should show examples of how other languages, compilers, etc. have solved the same or related problems. Even if there are no direct precedents for what is being proposed, there should ideally be some points of comparison for where ideas sprang from.
Proposed Approach
-----------------
Explain the idea in enough detail that a reader can concretely know what you are proposing to do. Anybody who is just going to *use* the resulting feature/system should be able to read this and get an accurate idea of what that experience will be like.
Detailed Explanation
--------------------
Here's where you go into the messy details related to language semantics, implementation, corner cases and gotchas, etc.
Ideally this section provides enough detail that a contributor who wasn't involved in the proposal process could implement the feature in a way that is faithful to the original.
Alternatives Considered
-----------------------
Any important alternative designs should be listed here.
If somebody comes along and says "that proposal is neat, but you should just do X" you want to be able to show that X was considered, and give enough context on why we made the decision we did.
This section doesn't need to be defensive, or focus on which of various options is "best".
Ideally we can acknowledge that different designs are suited for different circumstances/constraints.

View file

@ -1,348 +0,0 @@
SP #001: `where` Clauses
===============
We propose to allow generic declarations in Slang to move the constraints on generic type parameters outside of the `<>` and onto distinct `where` clauses.
Status
------
Status: Partially implemented. The only unimplemented case is the canonicalization of generic constraints.
Implementation: [PR 4986](https://github.com/shader-slang/slang/pull/4986)
Reviewed by: Theresa Foley, Yong He
Background
----------
Slang supports generic type parameters with *constraints* on them.
Currently constraints can only be written as part of the declaration of the type parameter itself, e.g.:
void resolve<T: IResolvable, U: IResolver<T>, V: IResolveDestination<T>>(
ResolutionContext<U> context, List<T> stuffToResolve, out V destination)
{ ... }
The above example illustrates how intermixing the declaration of the type parameters with their constraints can make for long declarations that can be difficult for programmers to read and understand.
Introducing `where` clauses allows a programmer to state the constraints *after* the rest of the declaration header, e.g.:
void resolve<T, U, V>(ResolutionContext<U> context, List<T> stuffToResolve, out V destination)
where T : IResolvable,
where U : IResolver<T>,
where V : IResolveDestination<T>
{ ... }
This latter form makes it easier to quickly glean the overall shape of the function signature.
A second important benefit of `where` clauses is that they open the door to expressing more complicated constraints on and between type parameters, such as allowing constraints on *associated types*, e.g.:
void writePackedData<T, U>(T src, out U dst)
where T : IPackable,
where T.Packed : IWritable<U>
{ .. }
Related Work
------------
Many other languages with support for generics have introduced `where` clauses, and most follow a broadly similar shape. To present our `resolve` example in various other languages:
### Rust
Rust supports `where` clauses with a comma-separated list of constraints:
fn resolve<T, U, V>(context: ResolutionContext<U>, stuffToResolve: List<T>, destination: mut& V)
where T : IResolvable,
U : IResolver<T>,
V : IResolveDestination<T>,
{ ... }
### Swift
Swift's `where` clauses are nearly identical to Rust's:
fn resolve<T, U, V>(context: ResolutionContext<U>, stuffToResolve: List<T>, destination: out V)
where T : IResolvable,
U : IResolver<T>,
V : IResolveDestination<T>,
{ ... }
### C#
C# is broadly similar, but uses multiple `where` clauses, one per constraint:
void resolve<T, U, V>(ResolutionContext<U> context, List<T> stuffToResolve, out V destination)
where T : IResolvable
where U : IResolver<T>
where V : IResolveDestination<T>
{ ... }
### Haskell
While Haskell is a quite different language from the others mentioned here, Haskell typeclasses have undeniably influenced the concept of traits/protocols in Rust/Swift.
In Haskell a typeclass is not something a type "inherits" from, and instead uses type parameter for even the `This` type.
Type parameters in Haskell are also introduced implicitly rather than explicitly.
The `resolve` example above would become something like:
resolve :: (Resolvable t, Resolver u t, ResolveDestination v t) =>
ResolutionContext u -> List t -> v
We see here that the constraints are all grouped together in the `(...) =>` clause before the actual type signature of the function.
That clause serves a similar semantic role to `where` clauses in these other languages.
Proposed Approach
-----------------
For any kind of declaration that Slang allows to have generic parameters, we will allow a `where` clause to appear after the *header* of that declaration.
A `where` clause consists of the (contextual) keyword `where`, following by a comma-separated list of *constraints*:
```csharp
struct MyStuff<T, U> : IFoo
where T : IFoo, IBar
where T : IBaz
where U : IArray<T>
{ ... }
```
A `where` clause is only allowed after the header of a declaration that has one or more generic parameters.
Each constraint must take the form of one of the type parameters from the immediately enclosing generic parameter list, followed by a colon (`:`), and then followed by a type expression that names an interface or a conjunction of interfaces.
Multiple constraints can be defined for the same parameter.
We haven't previously defined what the header of a declaration is, so we briefly illustrate what we mean by showing where the split between the header and the *body* of a declaration is for each of the major kinds of declarations that are supported. In each case a comment `/****/` is placed between the header and body:
```csharp
// variables:
let v : Int /****/ = 99;
var v : Int /****/ = 99;
Int v /****/ = 99;
// simple type declarations:
typealias X : IFoo /****/ = Y;
associatedtype X : IFoo /****/;
// functions and other callables:
Int f(Float y) /****/ { ... }
func f(Float y) -> Int /****/ { ... }
init(Float y) /****/ { ... }
subscript(Int idx) -> Float /****/ { ... }
// properties
property p : Int /****/ { ... }
// aggregates
extension Int : IFoo /****/ { ... }
struct Thing : Base /****/ { ... }
class Thing : Base /****/ { ... }
interface IThing : IBase /****/ { ... }
enum Stuff : Int /****/ { ... }
```
In practice, the body of a declaration starts at the `=` for declarations with an initial-value expression, at the opening `{` for declarations with a `{}`-enclosed body, or at the closing `;` for any other declarations.
With introduction of `where` clauses, we can extend type system to allow more kinds of type constraints. In this proposal,
we allow type constraints followed by `where` to be one of:
- Type conformance constraint, in the form of `T : IBase`
- Type equality constraint, in the form of `T == X`
In both cases, the left hand side of a constraint can be a simple generic type parameter, or any types that are dependent on some
generic type parameter. For example, the following is allowed:
```csharp
interface IFoo { associatedtype A; }
struct S<T, U>
where T : IFoo
where T.A == U
{}
```
Detailed Explanation
--------------------
### Implementation
The compiler implementation already represents generics in a form where the type parameters are encoded separately from the constraints that depend on them.
The constraints act somewhat like additional unnamed parameters of a generic.
At the Slang IR level these constraint parameters are made into explicit parameters used to pass around *witness tables*.
During parsing, a `where` clause can simply add the constraints to the outer generic (and error out if there isn't one).
The actual representation of constraints will be no different than before, so many downstream compilation steps should be unaffected.
Some parts of the codebase have historically assumed that a given generic type parameter can have at most *one* constraint;
these cases will need to be identified and fixed to allow for zero or more constraints per parameter.
Semantic checking of generics will need to validate that the left-hand side of each constraint is a direct reference to one of the type parameters of the immediately enclosing generic;
previously, the semantic checking logic could *assume* that this was the case, since the parser would only create constraints in that form.
### Interaction With Overloading and Redeclaration
Probably the most important semantic issue that arises from `where` clauses is deciding whether two different function declarations count as distinct overloads, or as redeclarations (or redfinitions) of the same function signature.
The existing form for declaring constraints:
void f<T : IFoo>( ... )
{ ... }
should be treated as sugar for the equivalent `where`-based form:
void f<T>( ... )
where T : IFoo
{ ... }
The two declarations of `f` there should not only be counted as redeclarations/redefinitions, but they should also be *indistinguishable* to all clients of the module where they appear.
A module that `import`s the module defining `f` should not be able to tell which form it was declared with.
Both forms of the definition should result in the *same* signature and mangled name in Slang IR.
Furthermore, with `where` clauses it becomes possible to write equivalent constraints in more than one way.
A `where` clause can be used instead of a conjunction of interfaces:
void f<T : IFoo & IBar>( ... )
{ ... }
void f<T>( ... )
where T : IFoo,
T : IBar
{ ... }
It is also possible to use `where` clauses to introduce constraints that are *redundant*, either by repeating the same constraint:
void f<T>( ... )
where T : IFoo,
T : IFoo
{ ... }
or by constraining a type to two interfaces, where one inherits from the other:
interface IBase {}
interface IDerived : IBase {}
void f<T>( ... )
where T : IBase,
T : IDerived
{ ... }
Technically it was already possible to have redundancy in a constraint by using a conjunction of two interfaces where one inherits from the other:
void f<T : IBase & IDerived>( ... )
{ ... }
One question that is raised by the possibility of redundant constraints is whether the compiler should produce a diagnostic for them and, if so, whether it should be a warning or an error.
While it may seem obvious that redundant constraints are to be avoided, it is possible that refactoring of `interface` hierarchies could change whether existing constraints are redundant or not, potentially forcing widespread edits to code that is semantically unambiguous (and just a little more verbose than necessary).
We propose that redundant constraints should probably produce a warning, with a way to silence that warning easily.
### Canonicalization
The long and short of the above section is that there can be multiple ways to write semantically equivalent generic declarations, by changing the form, order, etc. of constraints.
We want the signature of a function (and its mangled name, etc.) to be identical for semantically equivalent declaration syntax.
In order to ensure that a declaration's mangled name is independent of the form of its constraints, we must have a way to *canonicalize* those constraints.
The Swift compiler codebase includes a document that details the rules used for canonicalization of constraints for that compiler, and we can take inspiration from it.
Our constraints are currently much more restricted, so canonicalization can follow a much simpler process, such as:
* Start with the list of user-written constraints, in declaration order
* Iterate the following to convergence:
* For each constraint of the form `T : ILeft & IRight`, replace that constraint with constraints `T : ILeft` and `T : IRight`
* Remove each constraint that is implied by another constraint
* For now, that means removing `T : IBase` if there is already a constraint `T : IDerived` where `IDerived` inherits from `IBase`
* Sort the constraints
* For constraints `T : IFoo` and `U : IBar` on different type parameters, order them based on the order of the type parameters `T` and `U`
* For constraints `T : IFoo` and `T : IBar` on the *same* type parameter, order them based on a canonicalized ordering on the interfaces `IFoo` and `IBar`
The above ordering assumes that we can produce a canonical ordering of `interface`s.
More generally, we will eventually want a canonical ordering on all types and *values* that might appear in constraints.
For now, we will limit ourselves to an ordering on nominal types, and other declaration references:
* A generic parameter is always ordered before anything other than generic parameters
* Parameters from outer generics are ordered before those from inner generics
* Parameters from the same generic are ordered based on their order in the parameter list
* Two declaration references to distinct declarations are ordered based on a lexicographic order for their qualified names, meaning:
* If one qualified name is a prefix of the other (e.g., `A.B` and `A.B.C`), then the prefix is ordered first
* Otherwise, compare the first name component (from left to right) where the names differ, and order them based on a lexicographic string comparison of the name at that component.
Alternatives Considered
-----------------------
There really aren't any compelling alternatives to `where` clauses among the languages that Slang takes design influence from.
We could try to design something to solve the same problems from first principles, but the hypothetical benefits of doing so are unclear.
When it comes to the syntactic details, we could consider disallow type lists in the right hand side of a conformance constraint, and return allow multiple constraints to be separated with comma and sharing with one `where` keyword:
struct MyStuff<T> : Base, IFoo
where T : IFoo,
T : IBar
{ ... }
This alternative form may result in more compact code without needing duplicated `where` clause, but may be harder to achieve tidy diffs when editing the constraints on declarations.
Future Directions
-----------------
### Allow more general types on the right-hand side of `:`
Currently, the only constraints allowed using `:` have a concrete (non-`interface`) type on the left-hand side, and an `interface` (or conjunction of interfaces) on the right-hand side.
In the context of `class`-based hierarchies, we can also consider having constraints that limit a type parameter to subtypes of a specific concrete type:
class Base { ... }
class Derived : Base { ... }
void f<T>( ... )
where T : Base
{ ... }
### Allow `where` clauses on non-generic declarations
We could consider allowing `where` clauses to appear on any declaration nested under a generic, such that those declarations are only usable when certain additional constraints are met.
E.g.,:
struct MyDictionary<K,V>
{
...
K minimumKeyUsed()
where K : IComparable
{ ... }
}
In this example, the user's dictionary type can be queried for the minimum key that is used for any entry, but *only* if the keys are comparable.
Most of what can be done with this more flexible placement of `where` clauses can *also* be accomplished using extensions.
E.g., the above example could instead be written:
struct MyDictionary<K,V>
{ ... }
extension<K,V> MyDictionary<K,v>
where K : IComparable
{
K minimumKeyUsed()
{ ... }
}
### Implied Constraints
In many cases a generic function signature will use the type parameters as explicit arguments to generic types that impose their own requirements.
To be concrete, consider:
struct Dictionary<K, V>
where K : IHashable
{ ... }
V myLookupFunc<K,V>(
Dictionary<K,V> dictionary, K key, V default)
{ ... }
In this case, the current Slang language rules will reject `myLookupFunc`. The type of the `dictionary` parameter is passing `K` as an argument to `Dictionary<...>` but does not have an in-scope constraint that ensures that `K : IHashable`.
The current compiler requires the function to be rewritten as:
V myLookupFunc<K,V>(
Dictionary<K,V> dictionary, K key, V default)
where K : IHashable
{ ... }
But this additional constraint ends up being pointless; in order to invoke `myLookupFunc` the programmer must have a `Dictionary<K,V>` to pass as argument for the `dictionary` parameter, which means that the `Dictionary<K,V>` type must already be well-formed based on the information the caller function has.
The compiler can eliminate the need for such constraints by adding additional rules for expanding the set of constraints on a generic during canonicalization.
For any generic type `X<A, B, C, ...>` appearing in:
* the signature of a function declaration
* the bases of a type declaration
* the existing generic constraints
The expansion step would add whatever constraints are required by `X`, with the arguments `A, B, C, ...` substituted in for the parameters of `X`.

View file

@ -1,191 +0,0 @@
Allow Type Equality Constraints on Generics
===========================================
We propose to allow *type equality* constraints in `where` clauses.
Status
------
In progress.
Background
----------
As of proposal [001](001-where-clauses.md), Slang allows for generic declarations to include a *`where` clause* which enumerates constraints on the generic parameters that must be satisfied by any arguments provided to that generic:
V findOrDefault<K, V>( HashTable<K,V> table, K key )
where K : IHashable,
V : IDefaultInitializable
{ ... }
Currently, the language only accepts *conformance* constraints of the form `T : IFoo`, where `T` is one of the parameters of the generic, and `IFoo` is either an `interface` or a conjunction of interfaces, which indicate that the type `T` must conform to `IFoo`.
This proposal is motivated by the observation that when an interface has associated types, there is currently no way for a programmer to introduce a generic that is only applicable when an associated type satisfies certain constraints.
As an example, consider an interface for types that can be "packed" into a smaller representation for in-memory storage (instead of a default representation optimized for access from registers):
interface IPackable
{
associatedtype Packed;
init(Packed packed);
Packed pack();
}
Next, consider an hypothetical interface for types that can be deserialized from a stream:
interface IDeserializable
{
init( InputStream stream );
}
Given these definitions, we might want to define a function that takes a packable type, and deserializes it from a stream:
T deserializePackable<T>( InputStream stream )
where T : IPackable
{
return T( T.Packed(stream) );
}
As written, this function will fail to compile because the compiler cannot assume that `T.Packed` conforms to `IDeserializable`, in order to support initialization from a stream.
A brute-force solution would be to add the `IDeserializable` constraint to the `IPackable.Packed` associated type, but doing so may not be consistent with the vision the designer of `IPackable` had in mind. Indeed, there is no reason to assume that `IPackable` and `IDeserializable` even have the same author, or are things that the programmer trying to write `deserializePackable` can change.
It might seem that we could improve the situation by introducing another generic type parameter, so that we can explicitly constraint it to be deserializable:
T deserializePackable<T, U>( InputStream stream )
where T : IPackable,
P : IDeserializable
{
return T( U(stream) );
}
This second attempt *also* fails to compile.
In this case, there is no way for the compiler to know that `T` can be initialized from a `P`, because it cannot intuit that `P` is meant to be `T.Packed`.
Our two failed attempts can each be fixed by introducing two new kinds of constraints:
* Conformance constraints on associated types: `T.A : IFoo`
* Equality constraints on associated types: `T.A == X`
Related Work
------------
Both Rust and Swift support additional kinds of constraints on generics, including the cases proposed here.
The syntax in those languages matches what we propose.
Proposed Approach
-----------------
In addition to conformance constraints on generic type parameters (`T : IFoo`), the compiler will also support constraints on associated types of those parameters (`T.A : IFoo`), and associated types of those associated types (`T.A.B : IFoo`), etc.
In addition, the compiler will accept constraints that restrict an associated type (`T.A`, `T.A.B`, etc.) to be equal to some other type.
The other type may be a concrete type, another generic parameter, or another associated type.
Detailed Explanation
--------------------
### Parser
The parser already supports nearly arbitrary type exprssions on both sides of a conformance constraint, and then validates that the types used are allowed during semantic checking.
The only change needed at that level is to split `GenericTypeConstraintDecl` into two cases: one for conformance constraints, and another for equality constraints, and then to support constraints with `==` instead of `:`.
### Semantic Checking
During semantic checking, instead of checking that the left-hand type in a constraint is always one of the generic type parameters, we could instead check that the left-hand type expression is either a generic type parameter or `X.AssociatedType` where `X` would be a valid left-hand type.
The right-hand type for conformance constraints should be checked the same as before.
The right-hand type for an equality constraint should be allowed to be an arbitrary type expression that names a proper (and non-`interface`) type.
One subtlety is that in a type expression like `T.A.B` where both `A` and `B` are associated types, it may be that the `B` member of `T.A` can only be looked up because of another constraint like `T.A : IFoo`.
When performing semantic checking of a constraint in a `where` clause, we need to decide which of the constraints may inform lookup when resolving a type expression like `X.A`.
Some options are:
* We could consider only constraints that appear before the constraint that includes that type expression. In this case, a programmer must always introduce a constraint `X : IFoo` before a constraint that names `X.A`, if `A` is an associated type introduced by `IFoo`.
* We could consider *all* of the constraints simultaneously (except, perhaps, the constraint that we are in the middle of checking).
The latter option is more flexible, but may be (much) harder to implement in practice.
We propose that for now we use for first option, but remain open to implementing the more general case in the future.
Given an equality constraint like `T.A.B == X`, semantic checking needs detect cases where an `X` is used and a `T.A.B` is expected, or vice versa.
These cases should introduce some kind of cast-like expression, which references the type equality witness as evidence that the cast is valid (and should, in theory, be a no-op).
Semantic checking of equality constraints should identify contradictory sets of constraints.
Such contradictions can be simple to spot:
interface IThing { associatedtype A; }
void f<T>()
where T : IThing,
T.A == String,
T.A == Float,
{ ... }
but they can also be more complicated:
void f<T,U>()
where T : IThing,
U : IThing,
T.A == String,
U.A == Float,
T.A == U.A
{ ... }
In each case, an associated type is being constrained to be equal to two *different* concrete types.
The is no possible set of generic arguments that could satisfy these constraints, so declarations like these should be rejected.
We propose that the simplest way to identify and diagnose contradictory constraints like this is during canonicalization, as described below.
### IR
At the IR level, a conformance constraint on an associated type is no different than any other conformance constraint: it lowers to an explicit generic parameter that will accept a witness table as an argument.
The choice of how to represent equality constraints is more subtle.
One option is to lower an equality constraint to *nothing* at the IR level, under the assumption that the casts that reference these constraints should lower to nothing.
Doing so would introduce yet another case where the IR we generate doesn't "type-check."
The other option is to lower a type equality constraint to an explicit generic parameter which is then applied via an explicit op to convert between the associated type and its known concrete equivalent.
The representation of the witnesses required to provide *arguments* for such parameters is something that hasn't been fully explored, so for now we propose to take the first (easier) option.
### Canonicalization
Adding new kinds of constraints affects *canonicalization*, which was discussed in proposal 0001.
Conformane constraints involving associated types should already be order-able according to the rules in that proposal, so we primarily need to concern ourselves with equality constraints.
We propose the following approach:
* Take all of the equality constraints that arise after any expansion steps
* Divide the types named on either side of any equality constraint into *equivalence classes*, where if `X == Y` is a constraint, then `X` and `Y` must in the same equivalence class
* Each type in an equivalence class will either be an associated type of the form `T.A.B...Z`, derived from a generic type parameter, or a *independent* type, which here means anything other than those associated types.
* Because of the rules enforced during semantic checking, each equivalence class must have at least one associated type in it.
* Each equivalence class may have zero or more independent types in it.
* For each equivalence class with more than one independent type in it, diagnose an error; the application is attempting to constrain one or more associated types to be equal to multiple distinct types at once
* For each equivalence class with exactly one independent type in it, produce new constraints of the form `T.A.B...Z == C`, one for each associated type in the equivalence class, where `C` is the independent type
* For each equivalence class with zero independent types in it, pick the *minimal* associated type (according to the type ordering), and produce new constraints of the form `T.A... == U.B...` for each *other* associated type in the equivalence class, where `U.B...` is the minimal associated type.
* Sort the new constraints by the associated type on their left-hand side.
Alternatives Considered
-----------------------
The main alternative here would be to simply not have these kinds of constraints, and push programmers to use type parameters instead of associated types in cases where they want to be able to enforce constraints on those types.
E.g., the `IPackable` interface from earlier could be rewritten into this form:
interface IPackable<Packed>
{
init(Packed packed);
Packed pack();
}
With this form for `IPackable`, it becomes possible to use additional type parameters to constraint the `Packed` type:
T deserializePackable<T, U>( InputStream stream )
where T : IPackable<U>,
P : IDeserializable
{
return T( U(stream) );
}
While this workaround may seem reasomable in an isolated example like this, there is a strong reason why languages like Slang choose to have both generic type parameters (which act as *inputs* to an abstraction) and associated types (which act as *outputs*).
We believe that associated types are an important feature, and that they justify the complexity of these new kinds of constraints.

View file

@ -1,153 +0,0 @@
SP #003 - `Atomic<T>` type
==============
Status
------
Author: Yong He
Status: Implemented.
Implementation: [PR 5125](https://github.com/shader-slang/slang/pull/5125)
Reviewed by: Theresa Foley, Jay Kwak
Background
----------
HLSL defines atomic intrinsics to work on free references to ordinary values such as `int` and `float`. However, this doesn't translate well to Metal and WebGPU,
which defines `atomic<T>` type and only allow atomic operations to be applied on values of `atomic<T>` types.
Slang's Metal backend follows the same technique in SPIRV-Cross and DXIL->Metal converter that relies on a C++ undefined behavior that casts an ordinary `int*` pointer to a `atomic<int>*` pointer
and then call atomic intrinsic on the reinterpreted pointer. This is fragile and not guaranteed to work in the future.
To make the situation worse, WebGPU bans all possible ways to cast a normal pointer into an `atomic` pointer. In order to provide a truly portable way to define
atomic operations and allow them to be translatable to all targets, we will also need an `atomic<T>` type in Slang that maps to `atomic<T>` in WGSL and Metal, and maps to
`T` for HLSL/SPIRV.
Proposed Approach
-----------------
We define an `Atomic<T>` type that functions as a wrapper of `T` and provides atomic operations:
```csharp
enum MemoryOrder
{
Relaxed = 0,
Acquire = 1,
Release = 2,
AcquireRelease = 3,
SeqCst = 4,
}
[sealed] interface IAtomicable {}
[sealed] interface IArithmeticAtomicable : IAtomicable, IArithmetic {}
[sealed] interface IBitAtomicable : IArithmeticAtomicable, IInteger {}
[require(cuda_glsl_hlsl_metal_spirv_wgsl)]
struct Atomic<T : IAtomicable>
{
T load(MemoryOrder order = MemoryOrder.Relaxed);
[__ref] void store(T newValue, MemoryOrder order = MemoryOrder.Relaxed);
[__ref] T exchange(T newValue, MemoryOrder order = MemoryOrder.Relaxed); // returns old value
[__ref] T compareExchange(
T compareValue,
T newValue,
MemoryOrder successOrder = MemoryOrder.Relaxed,
MemoryOrder failOrder = MemoryOrder.Relaxed);
}
extension<T : IArithmeticAtomicable> Atomic<T>
{
[__ref] T add(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T sub(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T max(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T min(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
}
extension<T : IBitAtomicable> Atomic<T>
{
[__ref] T and(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T or(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T xor(T value, MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T increment(MemoryOrder order = MemoryOrder.Relaxed); // returns original value
[__ref] T decrement(MemoryOrder order = MemoryOrder.Relaxed); // returns original value
}
extension int : IArithmeticAtomicable {}
extension uint : IArithmeticAtomicable {}
extension int64_t : IBitAtomicable {}
extension uint64_t : IBitAtomicable {}
extension double : IArithmeticAtomicable {}
extension float : IArithmeticAtomicable {}
extension half : IArithmeticAtomicable {}
// Operator overloads:
// All operator overloads are using MemoryOrder.Relaxed semantics.
__prefix T operator++<T>(__ref Atomic<T> v); // returns new value.
__postfix T operator++<T>(__ref Atomic<T> v); // returns original value.
__prefix T operator--<T>(__ref Atomic<T> v); // returns new value.
__postfix T operator--<T>(__ref Atomic<T> v); // returns original value.
T operator+=(__ref Atomic<T> v, T operand); // returns new value.
T operator-=(__ref Atomic<T> v, T operand); // returns new value.
T operator|=(__ref Atomic<T> v, T operand); // returns new value.
T operator&=(__ref Atomic<T> v, T operand); // returns new value.
T operator^=(__ref Atomic<T> v, T operand); // returns new value.
```
We allow `Atomic<T>` to be defined in struct fields, as array elements, as elements of `RWStructuredBuffer` types,
or as groupshared variable types or `__ref` function parameter types. For example:
```hlsl
struct MyType
{
int ordinaryValue;
Atomic<int> atomicValue;
}
RWStructuredBuffer<MyType> atomicBuffer;
void main()
{
atomicBuffer[0].atomicValue.atomicAdd(1);
printf("%d", atomicBuffer[0].atomicValue.load());
}
```
In groupshared memory:
```hlsl
void main()
{
groupshared atomic<int> c;
c.atomicAdd(1);
}
```
Note that in many targets, it is invalid to use `atomic<T>` type to define a local variable or a function parameter, or in any way
to cause a `atomic<T>` to reside in local/function/private address space. Slang should be able to lower the type
into its underlying type. The use of atomic type in these positions will simply have no meaning. However, we are going to leave
this legalization as future work and leave such situation as undefined behavior for now.
This should be handled by a legalization pass similar to `lowerBufferElementTypeToStorageType` but operates
in the opposite direction: the "loaded" value from a buffer is converted into an atomic-free type, and storing a value leads to an
atomic store at the corresponding locations.
For non-WGSL/Metal targets, we can simply lower the type out of existence into its underlying type.
# Related Work
`Atomic<T>` type exists in almost all CPU programming languages and is the proven way to express atomic operations over different
architectures that have different memory models. WGSL and Metal follows this trend to require atomic operations being expressed
this way. This proposal is to make Slang follow this trend and make `Atomic<T>` the recommended way to express atomic operation
going forward.
# Future Work
As discussed in previous sections, we should consider adding a legalization pass to allow `Atomic<T>` type to be used anywhere in
any memory space, and legalize them out to just normal types if they are used in memory spaces where atomic semantic has no/trivial
meaning.

View file

@ -1,408 +0,0 @@
SP #004: Initialization
=================
This proposal documents the desired behavior of initialization related language semantics, including default constructor, initialization list and variable initialization.
Status
------
Status: Design Approved, implementation in-progress.
Implementation: N/A
Author: Yong He
Reviewer: Theresa Foley, Kai Zhang
Background
----------
Slang has introduced several different syntax around initialization to provide syntactic compatibility with HLSL/C++. As the language evolve, there are many corners where
the semantics around initialization are not well-defined, and causing confusion or leading to surprising behaviors.
This proposal attempts to provide a design on where we want the language to be in terms of how initialization is handled in all different places.
Related Work
------------
C++ has many different ways and syntax to initialize an object: through explicit constructor calls, initialization list, or implicitly in a member/variable declaration.
A variable in C++ can also be in an uninitialized state after its declaration. HLSL inherits most of these behvior from C++ by allowing variables to be uninitialized.
On the other hand, languages like C# and Swift has a set of well defined rules to ensure every variable is initialized after its declaration.
C++ allows using the initialization list syntax to initialize an object. The semantics of initialization lists depends on whether or not explicit constructors
are defined on the type.
Proposed Approach
-----------------
In this section, we document all concepts and rules related to initialization, constructors and initialization lists.
### Default Initializable type
A type is considered "default-initializable" if it provides a constructor that can take 0 arguments, so that it can be constructed with `T()`.
### Variable Initialization
Generally, a variable is considered uninitialized at its declaration site without an explicit value expression.
For example,
```csharp
struct MyType { int x ; }
void foo()
{
MyType t; // t is uninitialized.
var t1 : MyType; // same in modern syntax, t1 is uninitialized.
}
```
However, the Slang language has been allowing implicit initialization of variables whose types are default initializable types.
For example,
```csharp
struct MyType1 {
int x;
__init() { x = 0; }
}
void foo() {
MyType t1; // `t1` is initialized with a call to `__init`.
}
```
We would like to move away from this legacy behavior towards a consistent semantics of never implicitly initializing a variable.
To maintain backward compatibility, we will keep the legacy behavior, but remove the implicit initialization when the variable is defined
in modern syntax:
```csharp
void foo() {
var t1: MyType; // `t1` will no longer be initialized.
}
```
We will also remove the default initilaization semantics for traditional syntax in modern Slang modules that comes with an explicit `module` declaration.
Trying to use a variable without initializing it first is an error.
For backward compatibility, we will introduce a compiler option to turn this error into a warning, but we may deprecate this option in the future.
### Generic Type Parameter
A generic type parameter is not considered default-initializable by-default. As a result, the following code should leave `t` in an uninitialized state:
```csharp
void foo<T>()
{
T t; // `t` is uninitialized at declaration.
}
```
### Synthesis of constructors for member initialization
If a type already defines any explicit constructors, do not synthesize any constructors for initializer list call. An initializer list expression
for the type must exactly match one of the explicitly defined constructors.
If the type doesn't provide any explicit constructors, the compiler need to synthesize the constructors for the calls that that the initializer
lists translate into, so that an initializer list expression can be used to initialize a variable of the type.
For each type, we will synthesize one constructor at the same visibility of the type itself:
The signature for the synthesized initializer for type `V struct T` is:
```csharp
V T.__init(member0: typeof(member0) = default(member0), member1 : typeof(member1) = default(member1), ...)
```
where `V` is a visibility modifier, `(member0, member1, ... memberN)` is the set of members that has visibility `V`, and `default(member0)`
is the value defined by the initialization expression in `member0` if it exist, or the default value of `member0`'s type.
If `member0`'s type is not default initializable and the the member doesn't provide an initial value, then the parameter will not have a default value.
The synthesized constructor will be marked as `[Synthesized]` by the compiler, so the call site can inject additional compatibility logic when calling a synthesized constructor.
The body of the constructor will initialize each member with the value coming from the corresponding constructor argument if such argument exists,
otherwise the member will be initialized to its default value either defined by the init expr of the member, or the default value of the type if the
type is default-initializable. If the member type is not default-initializable and a default value isn't provided on the member, then such the constructor
synthesis will fail and the constructor will not be added to the type. Failure to synthesis a constructor is not an error, and an error will appear
if the user is trying to initialize a value of the type in question assuming such a constructor exist.
Note that if every member of a struct contains a default expression, the synthesized `__init` method can be called with 0 arguments, however, this will not cause a variable declaration to be implicitly initialized. Implicit initialization is a backward compatibility feature that only work for user-defined `__init()` methods.
### Single argument constructor call
Call to a constructor with a single argument is always treated as a syntactic sugar of type cast:
```csharp
int x = int(1.0f); // is treated as (int) 1.0f;
MyType y = MyType(arg); // is treated as (MyType)arg;
MyType x = MyType(y); // equivalent to `x = y`.
```
The compiler will attempt to resolve all type casts using type coercion rules, if that failed, will fall back to resolve it as a constructor call.
### Initialization List
Slang allows initialization of a variable by assigning it with an initialization list.
Generally, Slang will always try to resolve initialization list coercion as if it is an explicit constructor invocation.
For example, given:
```csharp
S obj = {1,2};
```
Slang will try to convert the code into:
```csharp
S obj = S(1,2);
```
Following the same logic, an empty initializer list will translate into a default-initialization:
```csharp
S obj = {};
// equivalent to:
S obj = S();
```
Note that initializer list of a single argument does not translate into a type cast, unlike the constructor call syntax. Initializing with a single element in the initializer list always translates directly into a constructor call. For example:
```csharp
void test()
{
MyType t = {1};
// translates to direct constructor call:
// MyType t = MyType.__init(1);
// which is NOT the same as:
// MyType t = MyType(t)
// or:
// MyType t = (MyType)t;
}
```
If the above code passes type check, then it will be used as the way to initialize `obj`.
If the above code does not pass type check, and if there is only one constructor for`MyType` that is synthesized as described in the previous section (and therefore marked as `[Synthesized]`, Slang continues to check if `S` meets the standard of a "legacy C-style struct` type.
A type is a "legacy C-Style struct" if all of the following conditions are met:
- It is a user-defined struct type or a basic scalar, vector or matrix type, e.g. `int`, `float4x4`.
- It does not contain any explicit constructors defined by the user.
- All its members have the same visibility as the type itself.
- All its members are legacy C-Style structs or arrays of legacy C-style structs.
Note that C-Style structs are allowed to have member default values.
In such case, we perform a legacy "read data" style consumption of the initializer list to synthesize the arguments to call the constructor, so that the following behavior is valid:
```csharp
struct Inner { int x; int y; };
struct Outer { Inner i; Inner j; }
// Initializes `o` into `{ Inner{1,2}, Inner{3,0} }`, by synthesizing the
// arguments to call `Outer.__init(Inner(1,2), Inner(3, 0))`.
Outer o = {1, 2, 3};
```
If the type is not a legacy C-Style struct, Slang should produce an error.
### Legacy HLSL syntax to cast from 0
HLSL allows a legacy syntax to cast from literal `0` to a struct type, for example:
```hlsl
MyStruct s { int x; }
void test()
{
MyStruct s = (MyStruct)0;
}
```
Slang treats this as equivalent to a empty-initialization:
```csharp
MyStruct s = (MyStruct)0;
// is equivalent to
MyStruct s = {};
```
Examples
-------------------
```csharp
// Assume everything below is public unless explicitly declared.
struct Empty
{
// compiler synthesizes:
// __init();
}
void test()
{
Empty s0 = {}; // Works, `s` is considered initialized via ctor call.
Empty s1; // `s1` is considered uninitialized.
}
struct CLike
{
int x; int y;
// compiler synthesizes:
// __init(int x, int y);
}
void test1()
{
CLike c0; // `c0` is uninitialized.
// case 1: initialized with synthesized ctor call using legacy logic to form arguments,
// and `c1` is now `{0,0}`.
// (we will refer to this scenario as "initialized with legacy logic" for
// the rest of the examples):
CLike c1 = {};
// case 2: initialized with legacy initializaer list logic, `c1` is now `{1,0}`:
CLike c2 = {1};
// case 3: initilaized with ctor call `CLike(1,2)`, `c3` is now `{1,2}`:
CLike c3 = {1, 2};
}
struct ExplicitCtor
{
int x;
int y;
__init(int x) {...}
// compiler does not synthesize any ctors.
}
void test2()
{
ExplicitCtor e0; // `e0` is uninitialized.
ExplicitCtor e1 = {1}; // calls `__init`.
ExplicitCtor e2 = {1, 2}; // error, no ctor matches initializer list.
}
struct DefaultMember {
int x = 0;
int y = 1;
// compiler synthesizes:
// __init(int x = 0, int y = 1);
}
void test3()
{
DefaultMember m; // `m` is uninitialized.
DefaultMember m1 = {}; // calls `__init()`, initialized to `{0,1}`.
DefaultMember m2 = {1}; // calls `__init(1)`, initialized to `{1,1}`.
DefaultMember m3 = {1,2}; // calls `__init(1,2)`, initialized to `{1,2}`.
}
struct PartialInit {
// warning: not all members are initialized.
// members should either be all-uninitialized or all-initialized with
// default expr.
int x;
int y = 1;
// compiler synthesizes:
// __init(int x, int y = 1);
}
void test4()
{
PartialInit i; // `i` is not initialized.
PartialInit i1 = {2}; // calls `__init`, result is `{2,1}`.
PartialInit i2 = {2, 3}; // calls `__init`, result is {2, 3}
}
struct PartialInit2 {
int x = 1;
int y; // warning: not all members are initialized.
// compiler synthesizes:
// __init(int x, int y);
}
void test5()
{
PartialInit2 j; // `j` is not initialized.
PartialInit2 j1 = {2}; // error, no ctor match.
PartialInit2 j2 = {2, 3}; // calls `__init`, result is {2, 3}
}
public struct Visibility1
{
internal int x;
public int y = 0;
// the compiler does not synthesize any ctor.
// the compiler will try to synthesize:
// public __init(int y);
// but then it will find that `x` cannot be initialized.
// so this synthesis will fail and no ctor will be added
// to the type.
}
void test6()
{
Visibility1 t = {0, 0}; // error, no matching ctor
Visibility1 t1 = {}; // error, no matching ctor
Visibility1 t2 = {1}; // error, no matching ctor
}
public struct Visibility2
{
// Visibility2 type contains members of different visibility,
// which disqualifies it from being considered as C-style struct.
// Therefore we will not attempt the legacy fallback logic for
// initializer-list syntax.
internal int x = 1;
public int y = 0;
// compiler synthesizes:
// public __init(int y = 0);
}
void test7()
{
Visibility2 t = {0, 0}; // error, no matching ctor.
Visibility2 t1 = {}; // OK, initialized to {1,0} via ctor call.
Visibility2 t2 = {1}; // OK, initialized to {1,1} via ctor call.
}
internal struct Visibility3
{
// Visibility3 type is considered as C-style struct.
// Because all members have the same visibility as the type.
// Therefore we will attempt the legacy fallback logic for
// initializer-list syntax.
// Note that c-style structs can still have init exprs on members.
internal int x;
internal int y = 2;
// compiler synthesizes:
// internal __init(int x, int y = 2);
}
internal void test8()
{
Visibility3 t = {0, 0}; // OK, initialized to {0,0} via ctor call.
Visibility3 t1 = {1}; // OK, initialized to {1,2} via ctor call.
Visibility3 t2 = {}; // OK, initialized to {0, 2} via legacy logic.
}
internal struct Visibility4
{
// Visibility4 type is considered as C-style struct.
// And we still synthesize a ctor for member initialization.
// Because Visibility4 has no public members, the synthesized
// ctor will take 0 arguments.
internal int x = 1;
internal int y = 2;
// compiler synthesizes:
// internal __init(int x = 1, int y = 2);
}
internal void test9()
{
Visibility4 t = {0, 0}; // OK, initialized to {0,0} via ctor call.
Visibility4 t1 = {3}; // OK, initialized to {3,2} via ctor call.
Visibility4 t2 = {}; // OK, initialized to {1,2} via ctor call.
}
```
### Zero Initialization
The Slang compiler supported an option to force zero-initialization of all local variables.
This is currently implemented by adding `IDefaultInitializable` conformance to all user
defined types. With the direction we are heading, we should remove this option in the future.
For now we can continue to provide this functionality but through an IR rewrite pass instead
of changing the frontend semantics.
When users specifies `-zero-initialize`, we should still use the same front-end logic for
all the checking. After lowering to IR, we should insert a `store` after all `IRVar : T` to
initialize them to `defaultConstruct(T)`.
Q&A
-----------
### Should global static and groupshared variables be default initialized?
Similar to local variables, all declarations are not default initialized at its declaration site.
In particular, it is difficult to efficiently initialized global variables safely and correctly in a general way on platforms such as Vulkan,
so implicit initialization for these variables can come with serious performance consequences.
### Should `out` parameters be default initialized?
Following the same philosophy of not initializing any declarations, `out` parameters are also not default-initialized.
Alternatives Considered
-----------------------
One important decision point is whether or not Slang should allow variables to be left in uninitialized state after its declaration as it is allowed in C++. In contrast, C# forces everything to be default initialized at its declaration site, which come at the cost of incurring the burden to developers to come up with a way to define the default value for each type.
Our opinion is we want to allow things as uninitialized, and to have the compiler validation checks to inform
the developer something is wrong if they try to use a variable in uninitialized state. We believe it is desirable to tell the developer what's wrong instead of using a heavyweight mechanism to ensure everything is initialized at declaration sites, which can have non-trivial performance consequences for GPU programs, especially when the variable is declared in groupshared memory.

View file

@ -1,61 +0,0 @@
SP #005: Write-Only Textures
=================
Add Write-Only texture types to Slang's core module.
Status
------
Status: Design Review.
Implementation: N/A
Author: Yong He
Reviewer:
Background
----------
Slang inherits HLSL's RWTexture types to represent UAV/storage texture resources, this works well for HLSL, GLSL, CUDA and SPIRV targets.
However Metal has the notion of write only textures, and WebGPU has limited support of read-write textures. In WebGPU, a read-write texture can only have
uncompressed single-channel 32bit texel format, which means a `RWTexture2D` cannot be used to write to a `rgba8unorm` texture.
To provide better mapping to write-only textures on Metal and WebGPU, we propose to add write-only textures to Slang to allow writing portable code
without relying on backend workarounds.
Proposed Approach
-----------------
Slang's core module already defines all texture types as a single generic `_Texture<T, ..., access, ...>` type, where `access` is a value parameter
representing the allowed access of the texture. The valid values of access are:
```
kCoreModule_ResourceAccessReadOnly
kCoreModule_ResourceAccessReadWrite
kCoreModule_ResourceAccessRasterizerOrdered
kCoreModule_ResourceAccessFeedback
```
We propose to add another case:
```
kCoreModule_ResourceAccessWriteOnly
```
to represent write-only textures.
Also add the typealiases prefixed with "W" for all write only textures:
```
WTexture1D, WTexture2D, ...
```
These types will be reported in the reflection API with `access=SLANG_RESOURCE_ACCESS_WRITE`.
Write-only textures support `GetDimension` and `Store(coord, value)` methods. `Load` or `subscript` is not defined for write-only texture types,
so the user cannot write code that reads from a write-only texture.
Write only textures are supported on all targets. For traditional HLSL, GLSL, SPIRV and CUDA targets, they are translated
exactly the same as `RW` textures. For Metal, they map to `access::write`, and for WGSL, they map to `texture_storage_X<format, write>`.

View file

@ -1,679 +0,0 @@
SP #007: Variadic Generics
=================
Variadic generics is the ability to define and use generic types and functions that has arbitrary number of generic type parameters.
For example, a tuple type can be represented as a generic type that has zero or any number of type parameters, i.e. a variadic generic.
Variadic types and functions are key building blocks to allow tuple types in the language, and will also enable us to define a
`IFunc<TResult, TParam...>` interface that represents a callable value. `IFunc` interface can allow users to start writing code that
takes "callback" functions as parameters and start to functors or adopting more functional programming idioms.
Supporting variadic generics is a big step up in Slang type system's expressive power, and will allow more meta programming logic to be
written in native Slang code rather than on top of it with macros or custom code generation tools.
Status
------
Status: Implemented.
Author: Yong He.
Implementation:
[PR 4833](https://github.com/shader-slang/slang/pull/4833),
[PR 4849](https://github.com/shader-slang/slang/pull/4849),
[PR 4850](https://github.com/shader-slang/slang/pull/4850),
[PR 4856](https://github.com/shader-slang/slang/pull/4856)
Reviewed by: Kai Zhang, Jay Kwak, Ariel Glasroth.
Background
----------
We have several cases that will benefit from variadic generics. One simplest example is the `printf` function is currently
defined to have different overloads for each number of arguments. The downside of duplicating overloads is the bloating the core
module size and a predefined upper limit of argument count. If users are to build their own functions that wraps the `printf`
function, they will have to define a set of overloads for each number of arguments too, further bloating code size.
Some of our users would like to implement the functor idiom in their shader code with interfaces. This is almost possible
with existing support of generics and interfaces. For example:
```
// Define an interface for the callback function
interface IProcessor
{
void process(int data);
}
// The callback function `p` is represented as a functor conforming to the `IProcessor` interface.
void process<TProcessor:IProcessor>(TProcessor p, int data[N])
{
for (int i = 0; i < N; i++)
p.process(data[i]);
}
// Define the functor as a type that conforms to `IProcessor`.
struct MyProcessorFunc : IProcessor
{
void process(int data) { ... }
}
void user(int myData[N])
{
// Define an instance of the functor, and pass it to `process`.
MyProcessorFunc functor = ...;
process(functor, myData);
}
```
While this can work, it requires a lot of boilterplate from the user. For each shape of callback, the user must define
a separate interface. We can reduce this boilterplate if the system has builtin support for `IFunc`:
```
// The callback function `p` is represented as a functor conforming to the `IProcessor` interface.
void process<TProcessor:IFunc<void, int>>(TProcessor p, int data[N])
{
for (int i = 0; i < N; i++)
p.process(data[i]);
}
// Define the functor as a type that conforms to `IProcessor`.
struct MyProcessorFunc : IFunc<void, int>
{
void process(int data) { ... }
}
void user(int myData[N])
{
// Define an instance of the functor, and pass it to `process`.
MyProcessorFunc functor = ...;
process(functor, myData);
}
```
The above code eliminates the user defined interface by using the builtin `IFunc` interface. By making `IFunc` builtin,
we can open the path for the compiler to synthesize conformances to `IFunc` for ordinary functions and in the future
add support for lambda expressions that automatically conform to `IFunc`, further simplify the user code into something like:
```
// The callback function `p` is represented as a functor conforming to the `IProcessor` interface.
void process<TProcessor:IFunc<void, int>>(TProcessor p, int data[N])
{
for (int i = 0; i < N; i++)
p.process(data[i]);
}
void user(int myData[N])
{
process((int x)=>{...}, myData);
}
```
Related Work
------------
Variadic generics is an advance type system feature that is missinng in many modern languages including C# and Rust.
Swift adds support for variadic generics recently in late 2022/2023, and this proposal largely follows Swift's design.
C++ has variadic templates that achieves similar results within the template system.
Rust supports variadics in a macro system layered above its core type system. While this can solve many user issues,
we decided to not go through this path because macros and templates must be expanded before core type checking, which means that
they can't integrate nicely in modules and compiled into IR independently of their use sites.
Proposed Approach
-----------------
Slang can follow Swift's solution for variadic generics. A user can define a variadic generic with the syntax:
```
void myFunc<each T>(expand each T v) {...}
```
The code above defines a generic function that has a __generic type pack parameter__ `T` with the `each` keyword before `T`.
The function's parameter list is defined as `expand each T v`, which should be interpreted as a parameter `v` whose type is
`expand each T`. `expand each T` is a type that represents a pack of types. A parameter whose type is a pack of types can
accept zero or more arguments during function call resolution.
`myFunc` can be called with arbitrary number of arguments:
```
myFunc(); // OK, zero arguments
myFunc(1, 2.0f, 3.0h); // OK, three arguments with different types.
```
A function can forward its variadic parameter to another function that accepts variadic parameter with the `expand` expression:
```
void caller<each T>(expand each T v)
{
myFunc(expand each v);
}
```
Generic type pack parameters can be nested, and there can be more than one variadic generic parameters in a single generic decl:
```
struct Parent<each T>
{
void f<each U>(...) {...} // OK, nested generics with type pack parameters
}
void g<each T, each U>(...) { ... } // OK, more than one type pack parameter in a single generic.
```
However, when more than one generic type pack parameters is referenced in a single `expand` expression, there is an implicit
requirement that these type packs will have the same number of elements. For example:
```
// implicitly requiring T and U to have same number of elements.
void g<each T, each U>(expand Pair<each T, each U> pairs) {...}
void user()
{
// We will match (int, float) to `T`, and (uint16_t, half) to `U`:
g<int, float, uint16_t, half>(
Pair<int, uint16_t>(1, 2),
Pair<float, half>(1.0f, 2.0h) );
}
```
In the example above, the type `expand Pair<each T, each U>` defines a pack of types where each element in the pack is formed by
replacing `each T` and `each U` in the __pattern type__ `Pair<each T, each U>` with the corresponding elements in type pack `T` and `U`.
Because the pattern type `Pair<each T, each U>` references two different type pack parameters `T` and `U`, we require that `T` and `U`
has the same number of types, this allows us to resolve `g<int, float, uint16_t, half>` by evenly dividing the the argument list
into two parts, such that `T = (int, float)` and `U = (uint16_t, half)`. With that, `expand Pair<each T, each U>` is then substituted
into a type pack `(Pair<int, uint16_t>, Pair<float, half>)`.
Generic type pack parameters can have type constraints:
```
void f<each T : IFloat>(expand each T v) {}
```
This means that every type in the type pack `T` must conform to the interface `IFloat`.
You can use any expression inside `expand` when it is used on values:
```
interface IGetValue
{
int getValue();
}
void print(each T)(expand each T) {...}
void f<each T : IFloat>(expand each T v)
{
print(expand (each v).getValue());
}
```
Here, `expand (each v).getValue()` will expand the pattern expression `(each v).getValue()` into a pack of values. The result of this `expand` expression
is a pack of values where each element of the pack is computed by substituting `each v` in the pattern expression with each element in `v`. The resulting
pack of `int` values is then passed to `print` function that also takes a pack of values.
For now, we require that all variadic generic type packs to appear in the end of a parameter list, after any ordinary parameters. This means that the following
definitions are invalid:
```
void f<each T, U>() {} // Error, ordinary parameter `U` after type pack.
void g<each T, U = int>() {} // Error, ordinary parameter after type pack.
void k<each T, let i : int>() {} // Error.
void h<U = int, each T>() {} // OK.
```
Additionally, we establish these restrictions on how `expand` and `each` maybe used:
- The pattern type of an `expand` type expression must capture at least one generic type pack parameter in an `each` expression.
- The type expression after `each` must refer to a generic type pack parameter, and the `each` expression can only appear inside an `expand` expression.
These rules means that expressions like `expand int`, or `each T` on its own are invalid expressions.
Similarly, when using `expand` and `each` on values, we require that:
- The pattern expression of an `expand` expression must capture at least one value whose type is a generic type pack parameter.
- The expression after `each` must refer to a value whose type is a generic type pack parameter, and the `each` expression can only appear inside an `expand` expression.
Combined with type euqality constraints, variadic generic type pack can be used to define homogeneously typed parameter pack:
```
void calcInts<each T>(expand each T values) where T == int
{
...
}
```
Detailed Explanation
--------------------
To implement variadic generics, we need to introduce several semantic constructs in our type system.
### `GenericTypePackParameterDecl`
When a generic parameter is defined with the `each` keyword, such as in `void f<each T>`, the parser should create a new type of AST node inside the generic, and we name this
AST node a `GenericTypePackParameterDecl`. With this additional, a generic parameter can be `GenericTypeParameterDecl`, `GenericValueParameterDecl`, `GenericTypeConstraintDecl`
and `GenericTypePackParameterDecl`. When the user defines type constraints on a generic type pack parameter, we will form a `GenericTypeConstraintDecl` whose `subType` is a
`DeclRefType` referencing the `GenericTypePackParameterDecl`.
### Type Pack
A type pack represents a pack of types. The simplest form of a type pack is a `ConcreteTypePack` that is a list of concerete type packs, such as `(int, float, float3)`.
In a generic decl such as `void f<each T>(T v)`, `T` refers to an abstract type pack represented by the generic type pack parameter `T`. The type of parameter `v` in this case
is a `DeclRefType(GenericTypePackParameterDecl "T")`.
The most general case of a type pack is defined by the `expand PatternExpr` type expression. In this case, the expression will be translated into a `ExpandType`, representing a
abstract type pack that can be evaluated by substituting all `each X` expressions in the `PatternExpr` with a corresponding element in `X`, and joining all the resulting element types
into a type pack.
Note that a `ConcreteTypePack` is very similar in semantic meaning to a `Tuple`, with the exception that `ConcreteTypePack` also bears the automatic flattening semantic, such that
`ConcreteTypePack(ConcreteTypePack(a,b), c)` is equivalent and can be simplified to `ConcreteTypePack(a,b,c)`.
In summary, a type pack can be represented by one of:
- `ConcreteTypePack`, a simple concrete list of element types.
- `DeclRefType(GenericTypePackParameterDecl)`, a simple reference to a generic type pack parameter.
- `ExpandType(PatternType)`, an abstract type pack resulting from expanding and evaluating `PatternType`.
### `ExpandType` and `EachType`
The type expression `expand each T` should be translated into `ExpandType(EachType(T), T)`. Here the first argument in `ExpandType` is the `PatternType`, which is what we will
use to expand into a concrete type pack. The second argument `T` represents all the generic type pack parameters that is being captured by `PatternType`. The reason to explicitly
keep track of captured generic type pack parameters is to make it easy to determine the size of the type pack without having to look into `PatternType`, and to ensure we never lose
the size info even when the pattern type itself is substituted into something that is independent of any generic type pack parameters.
For example, consider the substitution process on following case:
```
typealias F<T> = int; // result of F<T> is not dependent on T.
typealias MyPack<each T : IFoo> = expand F<each T>;
typealias Pack3 = MyPack<float, double, void>;
```
We can know from this definition that `Pack3` should evaluate to `(int, int, int)`. But let's see step-by-step how this is done in the type system.
First, `Pack3` is evaluated to `MyPack<ConcreteTypePack(float, double, void)>`. To further resolve this, we will plugin the argument `ConcreteTypePack(float, double, void)` into the
definition of `MyPack`. The definition `expand F<each T>` is represented as:
```
ExpandType(
pattern = DeclRefType(
GenericAppDeclRef(F,
args = [EachType(DeclRefType(GenericTypePackParamDecl "T"))])
),
capture = DeclRefType(GenericTypePackParamDecl "T")
)
```
But this type is simplifiable because `F` refers to a type alias whose definition is:
```
DeclRefType(StructDecl "int")
```
So the `expand F<eachT>` type can be further simplified down to:
```
ExpandType(
pattern = DeclRefType(StructDecl "int"),
capture = DeclRefType(GenericTypePackParamDecl "T")
)
```
Note that in this definition, the pattern type no longer contains any references to any `GenericTypePackParamDecl` so there is no way for us
to know how many elements the `ExpandType` should expand into just from the pattern type itself. Fortunately, we still kept a reference to
the generic type param decl through the `capture` argument in the `ExpandType`. This will allow us to evaluate it into `(int, int, int)` when
we apply substitution `T=ConcreteTypePack(float, double, void)` to it.
Let's take a look at another more contrived example to understand the substitution process. Assume we have:
```
interface IFoo
{
associatedtype Assoc;
};
struct Foo : IFoo
{
typealias Assoc = int;
};
struct Foo2 : IFoo
{
typealias Assoc = float;
};
typealias MyPack<each T : IFoo> = expand (each T).Assoc;
typealias Pack2 = MyPack<Foo, Foo2>;
```
When evaluating `Pack2`, we will first form a `ConcreteTypePack(Foo, Foo2)` and use it to substitute the `T` parameter in `MyPack`. This will result in `MyPack<ConcreteTypePack(Foo, Foo2)>`.
Then we continue to resolve this type alias by substituting `expand (each T).Assoc` with `T = ConcreteTypePack(Foo, Foo2)`.
The expression `expand (each T).Assoc` is translated into
```
ExpandType(
pattern =
DeclRefType(
LookupDeclRef(
EachType(DeclRefType(GenericTypePackParamDecl "T")),
IFoo::assoc
)
),
capture = DeclRefType(GenericTypePackParamDecl "T")
)
```
Substituting this with `DeclRefType(T) = ConcreteTypePack(Foo, Foo2)` we will get:
```
ExpandType(
pattern =
DeclRefType(
LookupDeclRef(
EachType(ConcreteTypePack(Foo, Foo2)),
IFoo::assoc
)
),
capture = ConcreteTypePack(Foo, Foo2)
)
```
Since the captured type pack in the `ExpandType` is already a concrete type pack, we should be able to turn this `ExpandType` into a
`ConcreteTypePack`, by substituting `pattern` twice, with `EachType(...)` replaced with a corresponding element in the input `ConcreteTypePack` to form:
```
ConcreteTypePack(
DeclRefType(
LookupDeclRef(
Foo,
IFoo::assoc
)
),
DeclRefType(
LookupDeclRef(
Foo2,
IFoo::assoc
)
)
)
```
And by resolving the `LookupDeclRef`, we will get:
```
ConcreteTypePack(
DeclRefType(StructDecl "int"),
DeclRefType(StructDecl "float")
)
```
Which is the correct representation for type pack `(int, float)`.
#### Simplification Rules of `Expand` and `Each` Types
By the definition of `expand` and `each`, we have these simplification rules:
- `expand each T` => `T`
- `each expand T` => `T`
### Type Constraints for Subtype Relationships
We define the sub-type relationship for type packs so that: given type pack `TPack`, we say
`TPack` is a subtype of `IFoo` (noted as `TPack:IFoo`) if every type in `TPack` is a subtype of `IFoo`.
In a generic definition `__generic<each T : IFoo>`, we will say the type pack `T` is a subtype of
`IFoo`. In the generic definition, we will have a `GenericTypeConstraintDecl` where
`subType = DeclRefType(GenericTypePackParamDecl "T")` and `supType = IFoo`. The fact that `T:IFoo` is
represented by a `DeclaredSubtypeWitness` whose `declRef` will point to this
`GenericTypeConstraintDecl`.
The subtype witness for a `ConcreteTypePack(T0, T1, ... Tn) : IBase` is represented by
`TypePackSubtypeWitness(SubtypeWitness(T0:IBase), SubtypeWitness(T1:IBase), ..., SubtypeWitness(Tn:IBase))`.
If a type pack `T` is a subtype of `IBase`, then `each T` is also a subtype of `IBase`.
The subtype witness for a `EachType(typePack) : IBase` is represented by
`EachTypeWitness(SubtypeWitness(typePack : IBase))`.
If a pattern type `P` is a subtype of `IBase`, then `expand P` is also a subtype of `IBase`.
The subtype witness for a `ExpandType(patternType, capture) : IBase` is represented by
`ExpandSubtypeWitness(SubtypeWitness(pattern : IBase))`.
Similar to `ExpandType` and `EachType`, we will have simplification rules such that:
- `ExpandSubtypeWitness(EachSubtypeWitness(x))` => `x`
- `EachSubtypeWitness(ExpandSubtypeWitness(x))` => `x`.
#### Canonical Representation of `TransitiveSubtypeWitness` for Type Packs
Given:
```
interface IBase
{
}
interface IDerived : IBase
{
}
__generic<each T : IDerived> ...
```
The witness of `DeclRefType("T")` conforming to `IDerived` will be represented by
```
DeclaredSubtypeWitness(
sub = DeclRefType(GenericTypePackParamDecl "T")
sup = `IBase`.
)
```
To represent the witness of `DeclRefType("T")` conforming to `IBase`, we will need to make use
of the `TransitiveSubtypeWitness`. For simplicity of IR generation, we would like to have `TransitiveSubtypeWitness`
not to deal with the case that `sub` is a type pack.
Therefore, instead of representing `DeclRefType("T") : IBase` as something like:
```
TransitiveSubtypeWitness(
subIsMid = DeclaredSubtypeWitness(
sub = DeclRefType(GenericTypePackParamDecl "T")
sup = `IBase`),
midIsSup = DeclaredSubtypeWitness(DeclRef(Iderived:IBase))
)
```
In the above definition, the `subType` of the witness is a type pack, which isn't very convenient to work with.
Instead, we will represent the same witness as:
```
ExpandSubtypeWitness(
TransitiveSubtypeWitness(
subIsMid = EachWitness(DeclaredSubtypeWitness(
sub = DeclRefType(GenericTypePackParamDecl "T")
sup = `IBase`)),
midIsSup = DeclaredSubtypeWitness(DeclRef(Iderived:IBase))
)
)
```
Note that in this second representation is effectively representing `expand ((each T) : IBase)`, where the `subType` of the `TransitiveSubtypeWitness`
is now a `EachType` and no longer a type pack. Doing this transformation will allow us to avoid the situation where we transitive witness lookup
is done on a pack of witnesses, and therefore simplifying the IR.
### Matching Arguments to Packs
When resolving overload to form a `DeclRef` to a generic decl or resolving overload in a function call, we need to match arguments to generic/function
parameters. Before introducing variadic type packs, this matching is trivial: an argument at index `i` will match to a parameter at index `i`.
With type packs, we need to generalize this logic. Because we have required that all type pack parameters to appear at the end of the generic or function parameter
list, we can still match argument 1:1 to parameters for all the non type pack parameters first. Once we have matched arguments to non type pack parameters and there
are additional arguments remaining, they must be for type pack parameters. If an argument is itself a concrete or abstract type pack, then we can continue to match
that argument 1:1 to the parameter. If not, then we require all the remaining arguments are individual types and not type packs. Because we require all type pack
parameters to have equal size, we can divide the remaining arguments evenly by the number of type pack parameters, and form a `TypePack`/`ValuePack` from that number
of arguments and supply it to each type pack parameter.
For example, assume we have:
```
struct S<T, each U, each V>
```
When resolving the overload for `S<int, int, void, float, bool>`, we have three parameters: `T`, `U`, `V` and five arguments: `int`, `int`, `void`, `float`, `bool`.
We will first perform argument match and match `T=int`. Now we have four arguments remaining and two type pack parameters. We can then divide 4 by 2 to get the
number of elements for each type pack argument, and form a `TypePack(int, void)` and use it as the matched argument for `U`, and form a `TypePack(float, bool)`
and use it as the matched argument for `V`.
After matching and the remaining overload resolution logic, `S<int, int, void, float, bool>` will be represented as:
```
GenericAppDeclRef
genericDecl = "S"
args = [
DeclRefType("int"), // For `T`
TypePack(DeclRefType("int"), DeclRefType("void")), // For `U`
TypePack(DeclRefType("float"), DeclRefType("bool")) // For `V`
]
```
Similarly, when resolving a function call with variadic parameters, we will perform argument matching and create `PackExpr` to use as argument to a packed parameter. Given:
```
void f<each T, each U>(int x, expand each T t, expand each U u) {...}
```
A call in the form of `f(3, Foo(), Bar(), 1.0f, false)` will be converted to:
```
f(3, Pack(Foo(), Bar()), Pack(1.0f, false))
```
After resolving the call. The `Pack(...)` represents the `PackExpr` synthesized by the compiler to create a `ValuePack` whose type is a `TypePack`, so
it can be used as argument to a `TypePack` parameter.
### IR Representation
#### Expressing Types
A concrete type pack is represented as `IRTypePack(T0, T1, ..., Tn)` in the IR, and an abstract type pack such as an `expand` type will eventually be specialized into an `IRTypePack`. This means that a function parameter whose type is a type pack is translated into a single parameter of `IRTypePack` type. Again, `IRTypePack` is in many ways similar to `IRTupleType`, except that `IRTypePack` are automatically flattened into enclosing type packs during specialization.
We will represent `expand` and `each` types in the IR almost 1:1 as they are represented in the AST. Note that types are hoistable insts in Slang IR and is globally deduplicated based on their operands, representing it in the natural way will allow these types to take advantage from Slang IR's global deduplication service.
This means that `each T` is represented as `IREachType(T)`, and `expand patternType` is represented as `IRExpandType(PatternType, capturedTypePacks)`
in the IR.
For example, the type `expand vector<each T, each U>`, where `T` and `U` are generic type pack parameters, is represented in the IR as:
```
%T = IRParam : IRTypePackParameterKind;
%U = IRParam : IRTypePackParameterKind;
%et = IREach %T;
%eu = IREach %U;
%v = IRVectorType(%et, %eu)
%expandType = IRExpandType(%v, %T, %U) // v is pattern; T,U are captured type packs.
```
Note that this kind of type hierarchy representation is only used during IR lowering in order to benefit from IR global deduplication of type definitions. The representation in this form isn't convenient for specialization.
Once lowered to IR step is complete, we will convert all type representation to the same form as value represenataion described in the following section.
#### Expressing Values
A value whose type is a type pack is called a value pack. A value pack is represented in the IR as a `IRMakeValuePack` inst.
For example, the value pack `(1,2,3)` will be represented in the IR as:
```
IRMakeValuePack(1,2,3) : IRTypePack(int, int, int)
```
An `expand(PatternExpr)` expression should be represented in the IR as:
```
%e = IRExpand : IRExpandType(...)
{
IRBlock
{
%index = IRParam : int;
yield PatternExpr; // may use `index` here.
}
}
```
The `IRExpand` is treated like an compile-time for loop where the loop body is expressed as basic blocks as the children of the `IRExpand` inst.
The body starts with a `%index` parameter that represents the loop index within the value pack, and the CFG inside `IRExpand` should end with a single
`yield` that is a terminal instruction "returning" the mapped value for element at `%index` in the input value pack.
For example, given `v` as value pack whose type is a type pack, `let x = expand (each v) + 1` will be represented in the IR as:
```
%v = /*some value pack whose type is a TypePack*/
%x = IRExpand : IRTypePack(...)
{
IRBlock
{
%index = IRParam : int;
%e = IRGetTupleElement(%v, %index);
%r = IRAdd %e 1;
IRYield %r;
}
}
```
In this simple example, the `IRExpand` contains only one basic block. It is possible for `IRExpand` to have more than one basic blocks if the pattern expression
contains a `?:` operator, in which case there will be a branching CFG structure inside the `IRExpand`.
Also note that `each v` is translated into `IRGetTupleElement(%v, %index)` that extacts the element at `%index` from the tuple value represented by `%v`.
#### IR Specialization
Specializing the IR for an `IRExpand` inst with a concrete value pack is very similar to loop unrolling. Given the example in the previous section
on expression `expand (each v) + 1`, we can specialize the `IRExpand` inst with `v` being an known value pack such as `IRMakeTuple(1,2,3)` in two steps.
Step 1 is to copy the children of the `IRExpand` inst three times into where the `IRExpand` inst itself is located, and during each copy, we replace
all references to `IRParam` with the concrete index for the copy. Therefore, specializing the above IR code with `IRMakeTuple(1,2,3)` will lead to:
```
%block0 = IRBlock
{
%e0 = IRGetTupleElement(%v, 0);
%r0 = IRAdd %e0 1;
yield %r0;
}
%block1 = IRBlock
{
%e1 = IRGetTupleElement(%v, 1);
%r1 = IRAdd %e1 1;
yield %r1;
}
%block2 = IRBlock
{
%e2 = IRGetTupleElement(%v, 2);
%r2 = IRAdd %e2 1;
yield %r2;
}
%mergeBlock = IRBlock
{
...
}
```
Step 2 is to hookup each copied blocks by replacing all the `yield` instructions with `branch` instructions, and form the final result of the value pack
by packing up all the values computed at each "loop iteration" in an `IRMakeValuePack` inst:
```
%block0 = IRBlock
{
%e0 = IRGetTupleElement(%v, 0);
%r0 = IRAdd %e0 1;
branch %block1;
}
%block1 = IRBlock
{
%e1 = IRGetTupleElement(%v, 1);
%r1 = IRAdd %e1 1;
branch %block2;
}
%block2 = IRBlock
{
%e2 = IRGetTupleElement(%v, 2);
%r2 = IRAdd %e2 1;
branch %mergeBlock;
}
%mergeBlock = IRBlock
{
%expand = IRMakeValuePack(%r0, %r1, %r2);
}
```
With this, we can replace the original `IRExpand` inst with `%expand` and specialization is done. The specialized instructions like `IRGetTupleElement(%v, 0)` will be picked up
in the follow-up step during specialization and replaced with the actual value at the specified index since `%v` is a known value pack represented by `IRMakeValuePack`. So after
folding and other simplifications, we should result in
```
%expand = IRMakeValuePack(2,3,4)
```
When specializing the original expression with `IRMakeValuePack(1,2,3)` in the IR.
Specialization of types and witness follows the same idea of value specialization, but since types and witnesses are represented directly as ordinary insts and operands instead of the
nested children of an `IRExpand`, we will use a recursive process on the type structure to perform the specialization. Most of the recursion logic should be trivial, and the only
interesting case is when specializing `IRExpandType` and `IREachType`. During the recursion process, we should maintain a state called `indexInPack` to represent the current expansion
index when specializing the pattern type of an `IRExpandType`, and then when we get to specialize an `IREachType(TPack)`, we should know which index in the pack we are currently
expanding by looking at the `indexInPack` context variable, and replace `IREachType(TypePack(T0, T1, ... Tn))` with the `T` at `indexInPack`.
After the specialization pass, there should be no more `IRExpand` and `IRExpandType` instructions in the IR. And we can lower t he remaining `IRTypePack` the same way as `IRTupleType`s.
Alternatives Considered
-----------------------
We considered the C++ `...` operator syntax and Swift's `repeat each` syntax and ended up picking Swift's design because it is easier to parse and is less ambiguous. Swift is strict about requiring `each` to precede a generic type pack parameter so `void f<each T>(T v)` is not a valid syntax to prevent confusion on what `T` is in this context. In Slang we don't require this because `expand each T` is always simplified down to `T`, and refer to the type pack.
We also considered not adding variadic generics support to the language at all, and just implement `Tuple` and `IFunc` as special system builtin types, like how it is done in C#. However we
believe that this approach is too limited when it comes to what the user can do with tuples and `IFunc`. Given Slang's position as a high performance GPU-first language, it is more important for Slang than other CPU languages to have a powerful type system that can provide zero-cost abstraction for meta-programming tasks. That lead us to believe that the language and the users can benefit from proper support of variadic generics.

View file

@ -1,140 +0,0 @@
SP #008 - Tuples
==============
Now that we have variadic generics in the language following [SP #007], we should now be able to support `Tuple` type as a core language feature.
`Tuple` types are useful in many places to reduce boilerplate in user code, such as in function return types to eliminate the need of defining
`struct`s that are used only for invoking the function. Adding `Tuple` types to Slang will also simplify interop with other languages such as Python
and C++ that have tuple types.
Status
------
Author: Yong He
Status: Implemented.
Implementation: [PR 4856](https://github.com/shader-slang/slang/pull/4856).
Reviewed by: Jay Kwak, Kai Zhang, Ariel Glasroth.
Background
----------
Tuple type is widely supported in almost all of the modern programming languages including C++, C#, Swift, Rust, Python. Supporting tuple types
in Slang will bring the language to parity with other languages and allow users to practice the same coding idioms in Slang, and allow Slang code
to interop more directly with other parts of the user application written in other languages.
Proposed Approach
-----------------
With variadic generics support, we can now easily define a Tuple type in the core module as:
```
__generic<each T>
__magic_type(TupleType)
struct Tuple
{
__intrinsic_op($(kIROp_MakeTuple))
__init(expand each T);
}
```
This will allow users to instantiate tuple types from their code with `Tuple<T0, T1, T2>(v0, v1, v2)`.
### Constructing Tuple Values
To make it easy to construct tuples, we will define a `makeTuple` function in the core module as:
```
__intrinsic_op($(kIROp_MakeTuple))
Tuple<expand each T> makeTuple(expand each T values);
```
With generic argument inferencing, this will enable user to write:
```
makeTuple(1, 2.0f) // returns Tuple<int, float>(1, 2.0f)
```
### Accessing Tuple Elements
We can extend the logic of vector element accessing to access tuple elements. Given `t` as a tuple, these expressions are valid:
```
t._0 // Access the first element
t._1 // Access the second element
```
### Swizzling
We can easily support tuple swizzles:
```
let t = Tuple<int, float>(1, 2.0f);
let v = t._1_0;
// v == Tuple<float, int>(2.0f, 1)
```
### Concatenation
We can define tuple concatenation operation in the core module as:
```
Tuple<expand each T, expand each U> concat<each T, each U>(Tuple<expand each T> first, Tuple<expand each U> second)
{
return makeTuple<expand each T, expand each U>(expand each first, expand each second);
}
```
### Counting
The `countof` expression can be used on type packs or tuple values to obtain the number of elements in a type pack or tuple.
And this result should be usable as a compile-time constant such as in a generic argument.
```
int bar<let n : int>()
{
}
int foo<each T>()
{
bar<countof T>(); // OK, countof T is a compile time constant.
Tuple<expand each T> t;
let c = countof t; // OK, countof can be used on tuple values.
}
```
### Operator Overloads
We should have builtin operator overloads for all comparison operators if every element type of a tuple conforms to `IComparable`.
This can be supported by defining an overload for these operators in the core module in the form of:
```
bool assign(inout bool r, bool v) { r = v; return v; }
__generic<each T : IComparable>
bool operator < (Tuple<T> t0, Tuple<T> t1)
{
bool greater = false;
bool equals = true;
expand greater || assign(equals, equals && (each t0) == (each t1)) && assign(greater, (each t0) > (each t1));
return !greater && !equals;
}
```
Alternatives Considered
----------------
Should we allow other operator overloads for tuples? This seems useful to have, but right now this is a bit tricky
because we haven't really settled on builtin interfaces. We need to finalize things like `IFloat`, `IInteger`,
`IArithmetic`, `ILogic` etc. first.
Should we automatically treat `Tuple` type to conform to any interface `IFoo` if every element in the tuple conforms to
`IFoo`? We can't because this is not well-defined. For example, if `IFoo` has a method that returns `int`,
should the tuple type's equivalent method return `Tuple<expand(int, T)>` or just `int`? In some cases you want one but
other times you want the other. And if the method returns a tuple, it is no longer consistent with the base interface
definition so this is all ill-formed.
We also considered having an overload of `concat` that appends individual elements to the end of a tuple, such as:
```
Tuple<T, U> concat<each T, each U>(Tuple<T> t, each U values);
```
However, this could lead to surprising behavior when the user writes `concat(t0, t1, t2)` where t1 and t2 are also tuples.
Having this overload means the result would be `(t0_0, t0_1, ... t0_n, t1, t2)` where the user could be expecting `t1` and `t2`
to be flattened into the resulting tuple. To avoid this surprising behavior, we decide to not include this overload in the core module.

View file

@ -1,142 +0,0 @@
SP #009 - IFunc interface
==============
Now that we have variadic generics in the language following [SP #007], we should now be able to define a builtin `IFunc` interface that represent
things that can be called with the `()` operator. This will allow users to write generic functions that takes a callback object and adopt more
functional programming idioms.
Status
------
Author: Yong He
Status: Implemented.
Implementation: [PR 4905](https://github.com/shader-slang/slang/pull/4905) [PR 4926](https://github.com/shader-slang/slang/pull/4926)
Reviewed by: Kai Zhang, Jay Kwak
Background
----------
Callback is an idiom that frequently show up in complex codebases. Currently, Slang users can implement this idiom with
interfaces:
```
interface ICondition
{
bool test(int x);
}
int countElement(int data[100], ICondition condition)
{
int count = 0;
for (int i = 0; i < data.getCount(); i++)
if (condition.test(data[i]))
count++;
return count;
}
int myCondition(int x) { return x%2 == 0; } // select all even numbers.
struct MyConditionWrapper : ICondition
{
bool test(int x) { return myCondition(x); }
};
void test()
{
int data[100] = ...;
int count = countElement(data, MyConditionWrapper());
}
```
As can be seen, this is a lot of boilerplate. With a builtin `IFunc` interface, we can
allow the compiler to automatically make ordinary functions conform to the interface,
eliminating the need for defining interfaces and wrapper types.
Proposed Approach
-----------------
We should support overloading of `operator()`, and use the function call syntax to call the `operator()` member, similar to C++:
```
struct Functor
{
int operator()(float p) {}
}
void test()
{
Functor f;
f(1.0f);
}
```
We propose `IFunc`, `IMutatingFunc`, `IDifferentiableFunc` and `IDiffernetiableMutatingFunc` that is defined as follows:
```
// Function objects that does not have a mutating state.
interface IMutatingFunc<TR, each TP>
{
[mutating]
TR operator()(expand each TP p);
}
// Function objects with a mutating state.
interface IFunc<TR, each TP> : IMutatingFunc<TR, expand each TP>
{
TR operator()(expand each TP p);
}
// Differentiable functions
interface IDifferentiableMutatingFunc<TR : IDifferentiable, each TP : IDifferentiable> : IMutatingFunc<TR, expand each TP>
{
[Differentiable]
[mutating]
TR operator()(expand each TP p);
}
interface IDifferentiableFunc<TR : IDifferentiable, each TP : IDifferentiable> : IFunc<TR, expand each TP>, IDifferentiableMutatingFunc<TR, expand each TP>
{
[Differentiable]
TR operator()(expand each TP p);
}
```
The `IMutatingFunc` interface is for defining functors that has a mutable state. The following example demonstrates its use:
```
void forEach(int data[100], inout IMutatingFunc<void, int> f)
{
for (int i = 0; i < data.getCount(); i++)
f(data[i]);
}
struct CounterFunc : IMutatingFunc<void, int>
{
int count;
[mutating]
void operator()(int data)
{
if (data % 2 == 0)
count++;
}
};
void test()
{
int data[100] = ...;
CounterFunc f;
f.count = 0;
forEach(data, f);
printf("%d", f.count);
}
```
# Coercion of ordinary functions
Eventually, we should allow ordinary functions to be automatically coerceable to `IFunc` interfaces. But this is scoped out
for the initial `IFunc` work, because we believe the implementation can be simpler if we support lambda function first, then
implement ordinary function coercion as a special case of lambda expressions.

View file

@ -1,285 +0,0 @@
# SP #010: New Differentiable Type System
## Problem
Our current `IDifferentiable` system has some flaws. It works fine for value types, since we can assume that every input gets a corresponding output or 'return' value. It works poorly for buffer/pointer types, since we don't 'return' a buffer, but simply want the getters/setters to be differentiable, and the resulting type to have a second buffer/pointer for the differential data.
Here's a demonstrative example with our current codebase when we use value types (like `float`)
```csharp
[Differentiable]
float add(float a, float b)
{
return a + b;
}
// Synthesized derivative:
[Differentiable]
void s_bwd_add(DifferentialPair<float> dpa, DifferentialPair<float> dpb, float.Differential d_out)
{
// A backward derivative method is currently responsible for 'setting' the differential values.
dpa = DifferentialPair<float>(dpa.p, d_out);
dpb = DifferentialPair<float>(dpb.p, d_out);
}
```
Unfortunately, this makes little sense if we decide to use buffer or pointer types:
```csharp
struct DiffPtr<T> : IDifferentiable
{
StructuredBuffer<T> bufferRef;
uint64 offset;
[Differentiable] T get() { ... }
[Differentiable] void set(T t) { ... }
/*
Problem 1:
We use custom derivatives for get() and set() to backprop and
read gradients. If DiffPtr<T> is differentiable, then get() and
set() need to operate on the *pair* type and not this struct type.
There is no proper way to do this currently.
*/
};
[Differentiable]
void add(DiffPtr<float> a, DiffPtr<float> b, DiffPtr<float> output)
{
output.set(a.get() + b.get());
}
// Synthesized derivative:
[Differentiable]
void s_bwd_add(
inout DifferentialPair<DiffPtr<float>> a,
inout DifferentialPair<DiffPtr<float>> b,
inout DifferentialPair<DiffPtr<float>> output)
{
/*
Problem 2:
Current backward mode semantics require that the method assume that the differentials
a.d and b.d are empty/zero, and it is the backward method's job to populate the result.
It doesn't make sense to 'set' the differential part since it is a buffer ref.
Rather, we want the user to provide the differential pointer, and use custom derivatives of
the getters/setters to propagate derivatives.
This also means methods like dzero(), dadd() and dmul() make no sense
in the context of pointer types. They cannot be initialized within a derivative method.
*/
}
```
## Workarounds
At the moment the primary workaround is to use a **non-differentiable buffer type** with differentiable methods, and always initialize the object with two pointers for both the primal and differential buffers. This is how our `DiffTensorView<T>` object works.
Unfortunately, this is a rather hacky workaround with several drawbacks:
1. `DiffTensorView<T>` does not conform to `IDifferentiable`, but is used for derivatives. This makes our type system less useful as checks for `is_subtype` from applications using reflection need workarounds to account for corner cases like these.
2. `DiffTensorView<T>` always has two buffer pointers even when used in non-differentiable methods. This is extra data in the struct, and potentially extra tensor allocations (we explicitly handle this case in `slangtorch` by leaving the diff part uninitialized if a primal method is invoked)
3. Higher-order derivatives don't work well with this workaround. Differentiating a method twice needs a set of 4 pointers, but we need to account for this ahead of time by using new types like `DiffDiffTensorView` that worsens the problem of carrying around extra data where its not required.
## Solution
We'll need to make the following 4 additions/changes:
### 1. `[deriv_method]` function decorator.
Intended for easy definition of custom derivatives for struct methods. It has the following properties:
1. Accesses to `this` within `[deriv_method]` are differential pairs.
2. Methods decorated with `[deriv_method]` cannot be called as regular methods (they can still be explicitly invoked with `bwd_diff(obj.method)`), and do not show up in the auto-complete list.
See the next section for example uses of `[deriv_method]`.
### 2. Split `IDifferentiable` interface: `IDifferentiableValueType` and `IDifferentiablePtrType`
This approach moves away from "type-driven" derivative semantics and towards more "function-driven" derivative semantics.
We no longer have a `dadd` , `dzero`, `dmul` etc.. we use default initialization instead of `dzero` and the backward derivative of the `use` method for `dadd`
Further, `IDifferentiablePtrType` types don't have any of these properties. They do not need a way to 'add', and it is especially important that there is no default initializer. We never want the compiler to be able to create a new object of `IDifferentiablePtrType` since we want to get the user-provided pointers.
Additionally, we can use `IDifferentiableValueType` as the current `IDifferentiable` for backwards compatibility (it should just work in 95% of cases, since no one really defines dadd/dzero/dmul explicitly anyway)
Here's the new set of base interfaces:
```csharp
interface __IDifferentiableBase { } // Helper type for our implementation.
interface IDifferentiableValueType : __IDifferentiableBase
{
associatedtype Differential : IDifferentiableValueType & IDefaultInitializable;
[Differentiable] This use(); // auto-synthesized
}
interface IDifferentiablePtrType : __IDifferentiableBase
{
associatedtype Differential : IDifferentiablePtrType;
}
```
Some extras in the core module allow us to constrain the diffpair type for things like `IArithmetic`
```csharp
// --- CORE MODULE EXTRAS ---
interface ISelfDifferentiableValueType : IDifferentiableValueType
{
// Force arithmetic types to be a differential pair of the same two types.
// Make it simple to define derivatives of arithmetic operations.
//
associatedtype Differential : This;
}
extension IFloat : ISelfDifferentiableValueType
{ }
extension float
{
// trivial auto-synthesis (maybe we even prevent the user from overriding this)
float use() { return this; }
// trivial auto-synthesis (maybe we even prevent the user from overriding this).
[ForwardDerivativeOf(use)]
[deriv_method] void use_fwd() { return this; }
// auto-synthesized if necessary by invoking the use_bwd for all fields.
// we need to provide implementation for 'leaf' types.
[BackwardDerivativeOf(use)]
[deriv_method] [mutating] void use_bwd(float d) { this.d += d; }
}
// The new system lets us define differentiable pointers easily.
// IDifferentiablePtrType'd values are simply treated as references, so they can be freely
// duplicated without requiring a `use()` for correctness.
//
struct DPtr<T : IDifferentiableValueType> : IDifferentiablePtrType
{
typealias Differential = DPtr<T.Differential>;
Buffer<T> buffer;
uint64 offset;
[BackwardDerivative(get_bwd)]
[BackwardDerivative(get_fwd)]
T get() { return this.buffer[offset]; }
[deriv_method] DifferentialPair<T> get_fwd()
{
return diffPair(this.p.buffer[offset], this.d().buffer[offset]);
}
[deriv_method] void get_bwd(Differential d)
{
return this.d.InterlockedAdd(offset, d);
}
DPtr<T> operator+(uint o) { return DPtr<T>{buffer, offset + o}; }
}
// Or we can define a fancier differentiable pointer that does a hashgrid
struct DHashGridPtr<T : IDifferentiableValueType, let N: int> : IDifferentiablePtrType
{
typealias Differential = DPtr<T.Differential>;
Buffer<T> buffer;
uint64 offset;
[BackwardDerivative(get_bwd)]
[BackwardDerivative(get_fwd)]
T get() { return this.buffer[offset]; }
[deriv_method] DifferentialPair<T> get_fwd()
{
return diffPair(this.p().buffer[offset], this.d().buffer[offset]);
}
[deriv_method] void get_bwd(Differential d)
{
return this.d().InterlockedAdd(offset * N + hash(get_thread_id()), d);
}
}
```
### 3. Every time we 'reuse' an object that conforms to `IDifferentiableValueType`, we split it with `use()` , and we use `__init__()` where necessary to initialize an accumulator.
Example:
```csharp
float f(float a)
{
add(a, a);
}
float add(float a, float b)
{
return a + b;
}
// Synthesized derivatives
void add_bwd(inout DiffPair<float> dpa, inout DiffPair<float> dpb, float d_out)
{
dpa = diffPair(dpa.p, d_out);
dpb = diffPair(dpb.p, d_out);
}
// Preprocessed-f (before derivative generation)
float f_with_use_expansion(float a)
{
DiffPair<float> a_extra = a.use();
return add(a, a_extra);
}
// After fwd-mode:
DiffPair<float> f_fwd(DiffPair<float> dpa)
{
DiffPair<float> dpa_extra = dpa.use_fwd();
return add_fwd(a, a_extra_fwd);
}
// bwd-mode:
void f_bwd(inout DiffPair<float> dpa, float d_out)
{
// fwd-pass
// split
DiffPair<float> dpa_extra = dpa.use_fwd();
// -------
// bwd-pass
dpa_extra_bwd = DiffPair<float>(dpa_extra.p, float.Differential::__init__());
add_bwd(dpa, dpa_extra, d_out);
// merge
dpa.use_bwd(dpa_extra);
}
```
### 4. Objects that conform to `IDifferentiablePtrType` are used without splitting. They are simply not 'transposed' at all, because there is nothing to transpose. The fwd-mode pair is used as is.
Here's the same example above, but with the `DPtr` type defined above.
```csharp
void f(DPtr<float> a, DPtr<float> output)
{
add(a, a, output);
}
void add(DPtr<float> a, DPtr<float> b, DPtr<float> output)
{
output.set(a.get() + b.get());
}
// Synthesized derivatives
// (note: no inout req'd for IDifferentiablePtrType)
// important difference is that `ptr` types don't get transposed, only
// methods on the objects are.
// they DO NOT have a default initializer (the user must supply the differential part)
void add_bwd(
DifferentialPair<DPtr<float>> dpa,
DifferentialPair<DPtr<float>> dpb,
DifferentialPair<DPtr<float>> output)
{
// forward pass.
var a_p = dpa.p.get();
var b_p = dpb.p.get();
// ----
// backward pass.
float.Differential d_val = DPtr<float>::set_bwd(output); // set_bwd works on the entire pair.
DifferentialPair<float> a_get_bwd = diffPair(a_p, float.Differential::__init__());
DifferentialPair<float> b_get_bwd = diffPair(b_p, float.Differential::__init__());
operator_float_add_bwd(a_get_result_bwd, b_get_result_bwd, d_val);
DPtr<float>::get_bwd(dpa);
DPtr<float>::get_bwd(dpb);
}
```

View file

@ -1,47 +0,0 @@
SP #011: Structured Binding
=================
Tuple types can reduce boilterplate code of defining auxiliary structs, but they can introduce readability issues because the elements are not named.
To mitigate this issue, we should support structured binding as a convenient way to access tuple elements with meaningful names.
# Status
Status: Proposal in review.
Implementation: N/A
# Proposed Approach
Users should be able to use `let` syntax to assign a composite type to a binding structure:
```
let tuple = makeTuple(1.0f, 2, 3);
let [a, b, c] = tuple;
```
Where the `let [...]` statement is a syntactic sugar of:
```
let a = tuple._0;
let b = tuple._1;
let c = tuple._2;
```
The right hand side of a structured binding can be a tuple, an array, or a struct type.
It is not an error if the composite value has more elements than the binding structure.
Mutable bindings are not allowed.
# Alternatives Considered
We could have allowed mutable bindings in the syntax of:
```
var [a,b,c] = ...
```
That defines mutable variables a,b,c whose values are copied from the structure.
However, mutable bindings can lead to confusions when modifying `a` doesn't change the value
int the source composite object from the binding. To avoid this confusion, we simply disallow
it.
Supporting mutation on the original composite object can be tricky as it involves reference types
that are not existent in the language. For simplicity we consider that to be out of scope of this
proposal.

Some files were not shown because too many files have changed in this diff Show more