Compare commits

...

5 commits

Author SHA1 Message Date
janis 8238f5bff0
almost done! 2026-04-04 00:28:43 +02:00
janis 465c6294e6
remote slang copy 2026-04-03 18:44:46 +02:00
janis c0d807b163
warnings 2026-04-03 03:15:06 +02:00
janis db27c98927
fix egui ? 2026-04-03 03:13:51 +02:00
janis 1c55a05215
debug name from bevy 2026-04-03 00:17:33 +02:00
176 changed files with 675 additions and 118322 deletions

View file

@ -1,9 +0,0 @@
[target.x86_64-unknown-linux-gnu]
linker = "clang"
rustflags = [
"-Clink-arg=-fuse-ld=mold",
# Nightly
"-Zshare-generics=y",
"-Zthreads=0",
]

View file

@ -42,6 +42,7 @@ ash = "0.38.0"
ash-window = "0.13.0" ash-window = "0.13.0"
vk-mem = "0.5.0" vk-mem = "0.5.0"
gpu-allocator = { git = "https://github.com/janis-bhm/gpu-allocator", branch = "main" } gpu-allocator = { git = "https://github.com/janis-bhm/gpu-allocator", branch = "main" }
rectangle-pack = "0.4.2"
vk-sync = "0.1.6" vk-sync = "0.1.6"
arrayvec = "0.7.6" arrayvec = "0.7.6"

View file

@ -1,7 +1,10 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use renderer::{Renderer2, render_graph, swapchain::WindowSurface}; use renderer::{
Renderer2, render_graph,
swapchain::{Surface, SwapchainConfiguration},
};
use tracing::info; use tracing::info;
use tracing_subscriber::EnvFilter; use tracing_subscriber::EnvFilter;
use winit::{ use winit::{
@ -18,7 +21,7 @@ struct WindowState {
egui_platform: egui_winit_platform::Platform, egui_platform: egui_winit_platform::Platform,
demo_app: egui_demo_lib::DemoWindows, demo_app: egui_demo_lib::DemoWindows,
scale_factor: f64, scale_factor: f64,
surface: WindowSurface, surface: Surface,
} }
struct WinitState { struct WinitState {
@ -53,13 +56,24 @@ impl WinitState {
fn handle_final_resize(&mut self, window_id: WindowId, new_size: PhysicalSize<u32>) { fn handle_final_resize(&mut self, window_id: WindowId, new_size: PhysicalSize<u32>) {
_ = (window_id, new_size); _ = (window_id, new_size);
info!("TODO: implement resize events"); info!("TODO: implement resize events");
if let Some(window) = self.windows.get(&window_id) { if let Some(WindowState { surface, .. }) = self.windows.get(&window_id) {
window let config = surface
.surface .swapchain()
.recreate_with(Some(renderer::Extent2D { .as_ref()
width: new_size.width, .map(|swapchain| swapchain.config().clone())
height: new_size.height, .unwrap_or(SwapchainConfiguration::default());
}))
surface
.configure(
self.renderer.device(),
SwapchainConfiguration {
extent: renderer::Extent2D {
width: new_size.width,
height: new_size.height,
},
..config
},
)
.expect("swapchain recreation"); .expect("swapchain recreation");
} }
} }
@ -91,10 +105,8 @@ impl WinitState {
let dev = renderer.device().clone(); let dev = renderer.device().clone();
use renderer::ash::vk::Handle; use renderer::ash::vk::Handle;
use renderer::device::DeviceOwned;
let [r, g, b]: [f32; 3] = let [r, g, b]: [f32; 3] =
rand::prelude::StdRng::seed_from_u64(window.surface.surface.handle().as_raw()) rand::prelude::StdRng::seed_from_u64(window.surface.raw().as_raw()).random();
.random();
render_graph::clear_pass(rg, renderer::util::Rgba([r, g, b, 1.0]), framebuffer); render_graph::clear_pass(rg, renderer::util::Rgba([r, g, b, 1.0]), framebuffer);
egui_pre_pass( egui_pre_pass(
&dev, &dev,
@ -290,7 +302,7 @@ impl ApplicationHandler for WinitState {
} }
fn main() { fn main() {
let _ = tracing_subscriber::fmt() _ = tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env()) .with_env_filter(EnvFilter::from_default_env())
.init(); .init();
let ev = EventLoop::new().unwrap(); let ev = EventLoop::new().unwrap();

View file

@ -24,6 +24,7 @@ ash = { workspace = true }
ash-window = { workspace = true } ash-window = { workspace = true }
vk-mem = { workspace = true } vk-mem = { workspace = true }
gpu-allocator = { workspace = true } gpu-allocator = { workspace = true }
rectangle-pack = { workspace = true }
raw-window-handle = { workspace = true } raw-window-handle = { workspace = true }
egui = { workspace = true , features = ["bytemuck"]} egui = { workspace = true , features = ["bytemuck"]}

View file

@ -1,12 +1,13 @@
#!/bin/bash #! /usr/bin/env nix-shell
#! nix-shell -i bash -p shader-slang
set -e set -e
SLANGC="/opt/shader-slang-bin/bin/slangc" SLANGC="slangc"
$SLANGC egui.slang -profile glsl_450 -target spirv -o egui_vert.spv -entry vertex $SLANGC egui.slang -profile glsl_450 -target spirv -o egui_vert.spv -entry vertex
$SLANGC egui.slang -profile glsl_450 -target spirv -o egui_frag.spv -entry fragment $SLANGC egui.slang -profile glsl_450 -target spirv -o egui_frag.spv -entry fragment
$SLANGC egui.slang -profile glsl_450 -target spirv -entry vertex -entry fragment -o egui.spv $SLANGC egui.slang -profile glsl_450 -target spirv -entry vertex -entry fragment -o egui.spv
$SLANGC wireframe.slang -profile glsl_450 -target spirv -entry vertex -entry fragment -o wireframe.spv $SLANGC wireframe.slang -profile glsl_450 -target spirv -entry vertex -entry fragment -o wireframe.spv
$SLANGC font.slang -profile glsl_450 -target spirv -entry vertex -entry fragment -o font.spv $SLANGC font.slang -profile glsl_450 -target spirv -entry vertex -entry fragment -o font.spv
$SLANGC font.slang -profile glsl_450 -target spirv -entry mesh -entry task -entry fragment_barycentric -o font_mesh.spv # $SLANGC font.slang -profile glsl_450 -target spirv -entry mesh -entry task -entry fragment_barycentric -o font_mesh.spv

View file

@ -3,14 +3,14 @@ struct Fragment {
} }
struct VertexIn { struct VertexIn {
[[vk::layout(0)]] float2 pos; [[vk::location(0)]] float2 pos;
[[vk::layout(1)]] float2 uv; [[vk::location(1)]] float2 uv;
[[vk::layout(2)]] float4 color; [[vk::location(2)]] float4 color;
} }
struct VertexOut { struct VertexOut {
[[vk::layout(0)]] float4 color; [[vk::location(0)]] float4 color;
[[vk::layout(1)]] float2 uv; [[vk::location(1)]] float2 uv;
[[vk::layout(2), flat]] uint draw_id; nointerpolation [[vk::location(2)]] uint draw_id;
float4 position : SV_Position; float4 position : SV_Position;
} }

Binary file not shown.

View file

@ -1,9 +1,9 @@
struct VertexIn { struct VertexIn {
[[vk::layout(0)]] float2 pos; [[vk::location(0)]] float2 pos;
} }
struct VertexOut { struct VertexOut {
[[vk::layout(0)]] float4 color; [[vk::location(0)]] float4 color;
float4 position : SV_Position; float4 position : SV_Position;
} }

Binary file not shown.

View file

@ -1,115 +0,0 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
// This creates a "module", which represents a collection of source files alongside
// some compilation options, such as optimization mode and linked system libraries.
// Every executable or library we compile will be based on one or more modules.
const lib_mod = b.createModule(.{
// `root_source_file` is the Zig "entry point" of the module. If a module
// only contains e.g. external object files, you can make this `null`.
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = b.path("src/root.zig"),
.target = target,
.optimize = optimize,
});
// We will also create a module for our other entry point, 'main.zig'.
const exe_mod = b.createModule(.{
// `root_source_file` is the Zig "entry point" of the module. If a module
// only contains e.g. external object files, you can make this `null`.
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
// Modules can depend on one another using the `std.Build.Module.addImport` function.
// This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
// file path. In this case, we set up `exe_mod` to import `lib_mod`.
exe_mod.addImport("shader_builder_lib", lib_mod);
// Now, we will create a static library based on the module we created above.
// This creates a `std.Build.Step.Compile`, which is the build step responsible
// for actually invoking the compiler.
const lib = b.addStaticLibrary(.{
.name = "shader_builder",
.root_module = lib_mod,
});
// This declares intent for the library to be installed into the standard
// location when the user invokes the "install" step (the default step when
// running `zig build`).
b.installArtifact(lib);
// This creates another `std.Build.Step.Compile`, but this one builds an executable
// rather than a static library.
const exe = b.addExecutable(.{
.name = "shader_builder",
.root_module = exe_mod,
});
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const lib_unit_tests = b.addTest(.{
.root_module = lib_mod,
});
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
const exe_unit_tests = b.addTest(.{
.root_module = exe_mod,
});
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_lib_unit_tests.step);
test_step.dependOn(&run_exe_unit_tests.step);
}

View file

@ -1,73 +0,0 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = "shader_builder",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
//.minimum_zig_version = "0.11.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL. If the contents of a URL change this will result in a hash mismatch
// // which will prevent zig from using it.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

View file

@ -1,45 +0,0 @@
//! By convention, main.zig is where your main function lives in the case that
//! you are building an executable. If you are making a library, the convention
//! is to delete this file and start with root.zig instead.
pub fn main() !void {
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
const stdout = bw.writer();
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try bw.flush(); // Don't forget to flush!
}
test "simple test" {
var list = std.ArrayList(i32).init(std.testing.allocator);
defer list.deinit(); // Try commenting this out and see if zig detects the memory leak!
try list.append(42);
try std.testing.expectEqual(@as(i32, 42), list.pop());
}
test "use other module" {
try std.testing.expectEqual(@as(i32, 150), lib.add(100, 50));
}
test "fuzz example" {
const global = struct {
fn testOne(input: []const u8) anyerror!void {
// Try passing `--fuzz` to `zig build test` and see if it manages to fail this test case!
try std.testing.expect(!std.mem.eql(u8, "canyoufindme", input));
}
};
try std.testing.fuzz(global.testOne, .{});
}
const std = @import("std");
/// This imports the separate module containing `root.zig`. Take a look in `build.zig` for details.
const lib = @import("shader_builder_lib");

View file

@ -1,13 +0,0 @@
//! By convention, root.zig is the root source file when making a library. If
//! you are making an executable, the convention is to delete this file and
//! start with main.zig instead.
const std = @import("std");
const testing = std.testing;
pub export fn add(a: i32, b: i32) i32 {
return a + b;
}
test "basic add functionality" {
try testing.expect(add(3, 7) == 10);
}

View file

@ -1,29 +0,0 @@
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
LLVM Exceptions to the Apache 2.0 License
As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into an Object form of such source code, you
may redistribute such embedded portions in such Object form without complying
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
In addition, if you combine or link compiled forms of this Software with
software that is licensed under the GPLv2 ("Combined Software") and if a
court of competent jurisdiction determines that the patent provision (Section
3), the indemnity provision (Section 9) or other Section of the License
conflicts with the conditions of the GPLv2, you may retroactively and
prospectively choose to deem waived or otherwise exclude such Section(s) of
the License, but only in their entirety and only with respect to the Combined
Software.

View file

@ -1,156 +0,0 @@
Slang
=====
![CI Status](https://github.com/shader-slang/slang/actions/workflows/ci.yml/badge.svg?branch=master)
![CTS Status](https://github.com/shader-slang/slang/actions/workflows/vk-gl-cts-nightly.yml/badge.svg)
Slang is a shading language that makes it easier to build and maintain large shader codebases in a modular and extensible fashion, while also maintaining the highest possible performance on modern GPUs and graphics APIs.
Slang is based on years of collaboration between researchers at NVIDIA, Carnegie Mellon University, Stanford, MIT, UCSD and the University of Washington.
Why Slang?
---------------
The Slang shading language is designed to enable real-time graphics developers to work with large-scale, high-performance shader code.
### Write Shaders Once, Run Anywhere
The Slang compiler can generate code for a wide variety of targets: D3D12, Vulkan, Metal, D3D11, OpenGL, CUDA, and even generate code to run on a CPU. For textual targets, such as Metal Shading Language (MSL) and CUDA, Slang produces readable code that preserves original identifier names, as well as the type and call structure, making it easier to debug.
### Access the Latest GPU Features
Slang code is highly portable, but can still leverage unique platform capabilities, including the latest features in Direct3D and Vulkan. For example, developers can make full use of [pointers](https://shader-slang.com/slang/user-guide/convenience-features.html#pointers-limited) when generating SPIR-V.
Slang's [capability system](https://shader-slang.com/slang/user-guide/capabilities.html) helps applications manage feature set differences across target platforms by ensuring code only uses available features during the type-checking step, before generating final code. Additionally, Slang provides [flexible interop](https://shader-slang.com/slang/user-guide/a1-04-interop.html) features to enable directly embedding target code or SPIR-V into generated shaders.
### Leverage Neural Graphics with Automatic Differentiation
Slang can [automatically generate both forward and backward derivative propagation code](https://shader-slang.com/slang/user-guide/autodiff.html) for complex functions that involve arbitrary control flow and dynamic dispatch. This allows existing rendering codebases to easily become differentiable, or for Slang to serve as the kernel language in a PyTorch-driven machine learning framework via [`slangtorch`](https://shader-slang.com/slang/user-guide/a1-02-slangpy.html).
### Scalable Software Development with Modules
Slang provides a [module system](https://shader-slang.com/slang/user-guide/modules.html) that enables logical organization of code for separate compilation. Slang modules can be independently compiled offline to a custom IR (with optional obfuscation) and then linked at runtime to generate code in formats such as DXIL or SPIR-V.
### Code Specialization that Works with Modules
Slang supports [generics and interfaces](https://shader-slang.com/slang/user-guide/interfaces-generics.html) (a.k.a. type traits/protocols), allowing for clear expression of shader specialization without the need for preprocessor techniques or string-pasting. Unlike C++ templates, Slang's generics are pre-checked and don't produce cascading error messages that are difficult to diagnose. The same generic shader can be specialized for a variety of different types to produce specialized code ahead of time, or on the fly, entirely under application control.
### Easy On-ramp for HLSL and GLSL Codebases
Slang's syntax is similar to HLSL, and most existing HLSL code can be compiled with the Slang compiler out-of-the-box, or with just minor modifications. This allows existing shader codebases to immediately benefit from Slang without requiring a complete rewrite or port.
Slang provides a compatibility module that enables the use of most GLSL intrinsic functions and GLSL's parameter binding syntax.
### Comprehensive Tooling Support
Slang comes with full support of IntelliSense editing features in Visual Studio Code and Visual Studio through the Language Server Protocol.
Full debugging capabilities are also available through RenderDoc and SPIR-V based tools.
Getting Started
---------------
The fastest way to get started using Slang in your own development is to use a pre-built binary package, available through GitHub [releases](https://github.com/shader-slang/slang/releases).
Slang binaries are also included in the [Vulkan SDK](https://vulkan.lunarg.com/sdk/home) since version 1.3.296.0.
There are packages built for x86_64 and aarch64 Windows, Linux and macOS.
Each binary release includes the command-line `slangc` compiler, a shared library for the compiler, and the `slang.h` header.
See the user-guide for info on using the `slangc` command-line tool: [Slang Command Line Usage](
https://shader-slang.com/slang/user-guide/compiling.html#command-line-compilation-with-slangc).
If you want to try out the Slang language without installing anything, a fast and simple way is to use the [Slang Playground](https://shader-slang.com/slang-playground). The playground allows you to compile Slang code to a variety of targets, and even run some simple shaders directly within the browser. The playground loads Slang compiler to your browser and runs all compilation locally. No data will be sent to any servers.
If you would like to build Slang from source, please consult the [build instructions](docs/building.md).
Documentation
-------------
The Slang project provides a variety of different [documentation](docs/), but most users would be well served starting with the [User's Guide](https://shader-slang.github.io/slang/user-guide/).
For developers writing Slang code, the [Slang Core Module Reference](https://shader-slang.com/stdlib-reference/) provides detailed documentation on Slang's built-in types and functions.
We also provide a few [examples](examples/) of how to integrate Slang into a rendering application.
These examples use a graphics layer that we include with Slang called "GFX" which is an abstraction library of various graphics APIs (D3D11, D2D12, OpenGL, Vulkan, CUDA, and the CPU) to support cross-platform applications using GPU graphics and compute capabilities.
If you'd like to learn more about GFX, see the [GFX User Guide](https://shader-slang.com/slang/gfx-user-guide/index.html).
Additionally, we recommend checking out [Vulkan Mini Examples](https://github.com/nvpro-samples/vk_mini_samples/) for more examples of using Slang's language features available on Vulkan, such as pointers and the ray tracing intrinsics.
Contributing
------------
If you'd like to contribute to the project, we are excited to have your input.
The following guidelines should be observed by contributors:
* Please follow the contributor [Code of Conduct](CODE_OF_CONDUCT.md).
* Bugs reports and feature requests should go through the GitHub issue tracker
* Changes should ideally come in as small pull requests on top of `master`, coming from your own personal fork of the project
* Large features that will involve multiple contributors or a long development time should be discussed in issues, and broken down into smaller pieces that can be implemented and checked in in stages
[Contribution guide](CONTRIBUTING.md) describes the workflow for contributors at more detail.
Limitations and Support
-----------------------
### Platform support
The Slang compiler and libraries can be built on the following platforms:
| Windows | Linux | MacOS | WebAssembly |
|:---------:|:---------:|:---------:|:------------:|
| supported | supported | supported | experimental |
Both `x86_64` and `aarch64` architectures are supported on Windows, Linux and MacOS platforms.
### Target support
Slang can compile shader code to the following targets:
| Target | Status | Output Formats |
|:-----------:|:-------------------------------------------------------------------------------------:|:----------------------------------------------------------------:|
| Direct3D 11 | [supported](https://shader-slang.com/slang/user-guide/targets.html#direct3d-11) | HLSL |
| Direct3D 12 | [supported](https://shader-slang.com/slang/user-guide/targets.html#direct3d-12) | HLSL |
| Vulkan | [supported](https://shader-slang.com/slang/user-guide/targets.html#vulkan) | SPIRV, GLSL |
| Metal | [experimental*](https://shader-slang.com/slang/user-guide/targets.html#metal) | Metal Shading Language |
| WebGPU | experimental** | WGSL |
| CUDA | [supported](https://shader-slang.com/slang/user-guide/targets.html#cuda-and-optix) | C++ (compute only) |
| Optix | [experimental](https://shader-slang.com/slang/user-guide/targets.html#cuda-and-optix) | C++ (WIP) |
| CPU | [experimental](https://shader-slang.com/slang/user-guide/targets.html#cpu-compute) | C++ (kernel), C++ (host), standalone executable, dynamic library |
> *Slang currently supports generating vertex, fragment, compute, task and mesh
> shaders for Metal.
> **WGSL support is still work in-progress.
For greater detail, see the [Supported Compilation
Targets](https://shader-slang.com/slang/user-guide/targets.html) section of the
[User Guide](https://shader-slang.github.io/slang/user-guide/)
The Slang project has been used for production applications and large shader
codebases, but it is still under active development. Support is currently
focused on the platforms (Windows, Linux) and target APIs (Direct3D 12, Vulkan)
where Slang is used most heavily. Users who are looking for support on other
platforms or APIs should coordinate with the development team via the issue
tracker to make sure that their use cases can be supported.
License
-------
The Slang code itself is under the Apache 2.0 with LLVM Exception license (see [LICENSE](LICENSE)).
Builds of the core Slang tools depend on the following projects, either automatically or optionally, which may have their own licenses:
* [`glslang`](https://github.com/KhronosGroup/glslang) (BSD)
* [`lz4`](https://github.com/lz4/lz4) (BSD)
* [`miniz`](https://github.com/richgel999/miniz) (MIT)
* [`spirv-headers`](https://github.com/KhronosGroup/SPIRV-Headers) (Modified MIT)
* [`spirv-tools`](https://github.com/KhronosGroup/SPIRV-Tools) (Apache 2.0)
* [`ankerl::unordered_dense::{map, set}`](https://github.com/martinus/unordered_dense) (MIT)
Slang releases may include [LLVM](https://github.com/llvm/llvm-project) under the license:
* [`llvm`](https://llvm.org/docs/DeveloperPolicy.html#new-llvm-project-license-framework) (Apache 2.0 License with LLVM exceptions)
Some of the tests and example programs that build with Slang use the following projects, which may have their own licenses:
* [`glm`](https://github.com/g-truc/glm) (MIT)
* `stb_image` and `stb_image_write` from the [`stb`](https://github.com/nothings/stb) collection of single-file libraries (Public Domain)
* [`tinyobjloader`](https://github.com/tinyobjloader/tinyobjloader) (MIT)

File diff suppressed because it is too large Load diff

View file

@ -1,444 +0,0 @@
public namespace slang
{
public typedef int32_t Result;
public typedef uint64_t Size;
public typedef int64_t Int;
public typedef uint64_t UInt;
/*!
@brief Severity of a diagnostic generated by the compiler.
Values come from the enum below, with higher values representing more severe
conditions, and all values >= SLANG_SEVERITY_ERROR indicating compilation
failure.
*/
public enum SlangSeverity
{
SLANG_SEVERITY_DISABLED = 0, /**< A message that is disabled, filtered out. */
SLANG_SEVERITY_NOTE, /**< An informative message. */
SLANG_SEVERITY_WARNING, /**< A warning, which indicates a possible proble. */
SLANG_SEVERITY_ERROR, /**< An error, indicating that compilation failed. */
SLANG_SEVERITY_FATAL, /**< An unrecoverable error, which forced compilation to abort. */
SLANG_SEVERITY_INTERNAL, /**< An internal error, indicating a logic error in the compiler. */
};
public enum SlangDiagnosticFlags
{
SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS = 0x01,
SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS = 0x02
};
public enum SlangBindableResourceType
{
SLANG_NON_BINDABLE = 0,
SLANG_TEXTURE,
SLANG_SAMPLER,
SLANG_UNIFORM_BUFFER,
SLANG_STORAGE_BUFFER,
};
public enum SlangCompileTarget
{
SLANG_TARGET_UNKNOWN,
SLANG_TARGET_NONE,
SLANG_GLSL,
SLANG_GLSL_VULKAN, //< deprecated: just use `SLANG_GLSL`
SLANG_GLSL_VULKAN_ONE_DESC, //< deprecated
SLANG_HLSL,
SLANG_SPIRV,
SLANG_SPIRV_ASM,
SLANG_DXBC,
SLANG_DXBC_ASM,
SLANG_DXIL,
SLANG_DXIL_ASM,
SLANG_C_SOURCE, ///< The C language
SLANG_CPP_SOURCE, ///< C++ code for shader kernels.
SLANG_CPP_PYTORCH_BINDING,
SLANG_HOST_EXECUTABLE, ///< Standalone binary executable (for hosting CPU/OS)
SLANG_SHADER_SHARED_LIBRARY, ///< A shared library/Dll for shader kernels (for hosting CPU/OS)
SLANG_SHADER_HOST_CALLABLE, ///< A CPU target that makes the compiled shader code available to be run immediately
SLANG_CUDA_SOURCE, ///< Cuda source
SLANG_PTX, ///< PTX
SLANG_OBJECT_CODE, ///< Object code that can be used for later linking
SLANG_HOST_CPP_SOURCE, ///< C++ code for host library or executable.
SLANG_HOST_HOST_CALLABLE, ///<
SLANG_TARGET_COUNT_OF,
};
/* A "container format" describes the way that the outputs
for multiple files, entry points, targets, etc. should be
combined into a single artifact for output. */
public enum SlangContainerFormat
{
/* Don't generate a container. */
SLANG_CONTAINER_FORMAT_NONE,
/* Generate a container in the `.slang-module` format,
which includes reflection information, compiled kernels, etc. */
SLANG_CONTAINER_FORMAT_SLANG_MODULE,
};
public enum SlangPassThrough : int
{
SLANG_PASS_THROUGH_NONE,
SLANG_PASS_THROUGH_FXC,
SLANG_PASS_THROUGH_DXC,
SLANG_PASS_THROUGH_GLSLANG,
SLANG_PASS_THROUGH_SPIRV_DIS,
SLANG_PASS_THROUGH_CLANG, ///< Clang C/C++ compiler
SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler
SLANG_PASS_THROUGH_GCC, ///< GCC C/C++ compiler
SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type
SLANG_PASS_THROUGH_NVRTC, ///< NVRTC Cuda compiler
SLANG_PASS_THROUGH_LLVM, ///< LLVM 'compiler' - includes LLVM and Clang
SLANG_PASS_THROUGH_SPIRV_OPT,
SLANG_PASS_THROUGH_COUNT_OF,
};
/* Defines an archive type used to holds a 'file system' type structure. */
public enum SlangArchiveType : int
{
SLANG_ARCHIVE_TYPE_UNDEFINED,
SLANG_ARCHIVE_TYPE_ZIP,
SLANG_ARCHIVE_TYPE_RIFF, ///< Riff container with no compression
SLANG_ARCHIVE_TYPE_RIFF_DEFLATE,
SLANG_ARCHIVE_TYPE_RIFF_LZ4,
SLANG_ARCHIVE_TYPE_COUNT_OF,
};
/*!
Flags to control compilation behavior.
*/
public enum SlangCompileFlags
{
/* Do as little mangling of names as possible, to try to preserve original names */
SLANG_COMPILE_FLAG_NO_MANGLING = 1 << 3,
/* Skip code generation step, just check the code and generate layout */
SLANG_COMPILE_FLAG_NO_CODEGEN = 1 << 4,
/* Obfuscate shader names on release products */
SLANG_COMPILE_FLAG_OBFUSCATE = 1 << 5,
/* Deprecated flags: kept around to allow existing applications to
compile. Note that the relevant features will still be left in
their default state. */
SLANG_COMPILE_FLAG_NO_CHECKING = 0,
SLANG_COMPILE_FLAG_SPLIT_MIXED_TYPES = 0,
};
/*!
@brief Flags to control code generation behavior of a compilation target */
public enum SlangTargetFlags
{
None = 0,
/* When compiling for a D3D Shader Model 5.1 or higher target, allocate
distinct register spaces for parameter blocks.
@deprecated This behavior is now enabled unconditionally.
*/
SLANG_TARGET_FLAG_PARAMETER_BLOCKS_USE_REGISTER_SPACES = 1 << 4,
/* When set, will generate target code that contains all entrypoints defined
in the input source or specified via the `spAddEntryPoint` function in a
single output module (library/source file).
*/
SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM = 1 << 8,
/* When set, will dump out the IR between intermediate compilation steps.*/
SLANG_TARGET_FLAG_DUMP_IR = 1 << 9,
/* When set, will generate SPIRV directly instead of going through glslang. */
SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY = 1 << 10,
};
/*!
@brief Options to control floating-point precision guarantees for a target.
*/
public enum SlangFloatingPointMode
{
SLANG_FLOATING_POINT_MODE_DEFAULT = 0,
SLANG_FLOATING_POINT_MODE_FAST,
SLANG_FLOATING_POINT_MODE_PRECISE,
};
/*!
@brief Options to control emission of `#line` directives
*/
public enum SlangLineDirectiveMode
{
SLANG_LINE_DIRECTIVE_MODE_DEFAULT = 0, /**< Default behavior: pick behavior base on target. */
SLANG_LINE_DIRECTIVE_MODE_NONE, /**< Don't emit line directives at all. */
SLANG_LINE_DIRECTIVE_MODE_STANDARD, /**< Emit standard C-style `#line` directives. */
SLANG_LINE_DIRECTIVE_MODE_GLSL, /**< Emit GLSL-style directives with file *number* instead of name */
};
public enum SlangSourceLanguage : int
{
SLANG_SOURCE_LANGUAGE_UNKNOWN,
SLANG_SOURCE_LANGUAGE_SLANG,
SLANG_SOURCE_LANGUAGE_HLSL,
SLANG_SOURCE_LANGUAGE_GLSL,
SLANG_SOURCE_LANGUAGE_C,
SLANG_SOURCE_LANGUAGE_CPP,
SLANG_SOURCE_LANGUAGE_CUDA,
SLANG_SOURCE_LANGUAGE_COUNT_OF,
};
public enum SlangProfileID
{
SLANG_PROFILE_UNKNOWN,
};
public enum SlangCapabilityID
{
SLANG_CAPABILITY_UNKNOWN = 0,
};
public enum SlangMatrixLayoutMode
{
SLANG_MATRIX_LAYOUT_MODE_UNKNOWN = 0,
SLANG_MATRIX_LAYOUT_ROW_MAJOR,
SLANG_MATRIX_LAYOUT_COLUMN_MAJOR,
};
public enum SlangStage
{
SLANG_STAGE_NONE,
SLANG_STAGE_VERTEX,
SLANG_STAGE_HULL,
SLANG_STAGE_DOMAIN,
SLANG_STAGE_GEOMETRY,
SLANG_STAGE_FRAGMENT,
SLANG_STAGE_COMPUTE,
SLANG_STAGE_RAY_GENERATION,
SLANG_STAGE_INTERSECTION,
SLANG_STAGE_ANY_HIT,
SLANG_STAGE_CLOSEST_HIT,
SLANG_STAGE_MISS,
SLANG_STAGE_CALLABLE,
SLANG_STAGE_MESH,
SLANG_STAGE_AMPLIFICATION,
};
public enum SlangDebugInfoLevel
{
SLANG_DEBUG_INFO_LEVEL_NONE = 0, /**< Don't emit debug information at all. */
SLANG_DEBUG_INFO_LEVEL_MINIMAL, /**< Emit as little debug information as possible, while still supporting stack trackes. */
SLANG_DEBUG_INFO_LEVEL_STANDARD, /**< Emit whatever is the standard level of debug information for each target. */
SLANG_DEBUG_INFO_LEVEL_MAXIMAL, /**< Emit as much debug infromation as possible for each target. */
};
public enum SlangOptimizationLevel
{
SLANG_OPTIMIZATION_LEVEL_NONE = 0, /**< Don't optimize at all. */
SLANG_OPTIMIZATION_LEVEL_DEFAULT, /**< Default optimization level: balance code quality and compilation time. */
SLANG_OPTIMIZATION_LEVEL_HIGH, /**< Optimize aggressively. */
SLANG_OPTIMIZATION_LEVEL_MAXIMAL, /**< Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs */
};
public enum SlangTypeKind
{
NONE,
STRUCT,
ARRAY,
MATRIX,
VECTOR,
SCALAR,
CONSTANT_BUFFER,
RESOURCE,
SAMPLER_STATE,
TEXTURE_BUFFER,
SHADER_STORAGE_BUFFER,
PARAMETER_BLOCK,
GENERIC_TYPE_PARAMETER,
INTERFACE,
OUTPUT_STREAM,
SPECIALIZED,
FEEDBACK,
COUNT,
};
public enum SlangScalarType
{
NONE,
VOID,
BOOL,
INT32,
UINT32,
INT64,
UINT64,
FLOAT16,
FLOAT32,
FLOAT64,
INT8,
UINT8,
INT16,
UINT16,
};
public struct TypeReflection
{
};
public enum CompileStdLibFlags
{
WriteDocumentation = 0x1,
};
[COM("8BA5FB08-5195-40e2-AC58-0D-98-9C-3A-01-02")]
public interface ISlangBlob
{
public void *getBufferPointer();
public Size getBufferSize();
};
/** Description of a code generation target.
*/
public struct TargetDesc
{
/** The size of this structure, in bytes.
*/
public Size structureSize = 40;
/** The target format to generate code for (e.g., SPIR-V, DXIL, etc.)
*/
public SlangCompileTarget format = SlangCompileTarget.SLANG_TARGET_UNKNOWN;
/** The compilation profile supported by the target (e.g., "Shader Model 5.1")
*/
public SlangProfileID profile = SlangProfileID.SLANG_PROFILE_UNKNOWN;
/** Flags for the code generation target. Currently unused. */
public SlangTargetFlags flags = SlangTargetFlags.None;
/** Default mode to use for floating-point operations on the target.
*/
public SlangFloatingPointMode floatingPointMode = SlangFloatingPointMode.SLANG_FLOATING_POINT_MODE_DEFAULT;
/** Optimization level to use for the target.
*/
public SlangOptimizationLevel optimizationLevel = SlangOptimizationLevel.SLANG_OPTIMIZATION_LEVEL_DEFAULT;
/** The line directive mode for output source code.
*/
public SlangLineDirectiveMode lineDirectiveMode = SlangLineDirectiveMode.SLANG_LINE_DIRECTIVE_MODE_DEFAULT;
/** Whether to force `scalar` layout for glsl shader storage buffers.
*/
public bool forceGLSLScalarBufferLayout = false;
};
public enum SessionFlags
{
kSessionFlags_None = 0
};
public struct PreprocessorMacroDesc
{
public NativeString name;
public NativeString value;
};
public struct SessionDesc
{
/** The size of this structure, in bytes.
*/
public Size structureSize = 72;
/** Code generation targets to include in the session.
*/
public TargetDesc *targets = nullptr;
public Int targetCount = 0;
/** Flags to configure the session.
*/
public SessionFlags flags = SessionFlags.kSessionFlags_None;
/** Default layout to assume for variables with matrix types.
*/
public SlangMatrixLayoutMode defaultMatrixLayoutMode = SlangMatrixLayoutMode.SLANG_MATRIX_LAYOUT_ROW_MAJOR;
/** Paths to use when searching for `#include`d or `import`ed files.
*/
public NativeString *searchPaths = nullptr;
public Int searchPathCount = 0;
public PreprocessorMacroDesc *preprocessorMacros = nullptr;
public Int preprocessorMacroCount = 0;
public void *fileSystem = nullptr;
};
/** A global session for interaction with the Slang library.
An application may create and re-use a single global session across
multiple sessions, in order to amortize startups costs (in current
Slang this is mostly the cost of loading the Slang standard library).
The global session is currently *not* thread-safe and objects created from
a single global session should only be used from a single thread at
a time.
*/
[COM("c140b5fd-0c78-452e-ba7c-1a-1e-70-c7-f7-1c")]
public interface IGlobalSession
{
};
public enum class ContainerType
{
None, UnsizedArray, StructuredBuffer, ConstantBuffer, ParameterBlock
};
/** A session provides a scope for code that is loaded.
A session can be used to load modules of Slang source code,
and to request target-specific compiled binaries and layout
information.
In order to be able to load code, the session owns a set
of active "search paths" for resolving `#include` directives
and `import` declrations, as well as a set of global
preprocessor definitions that will be used for all code
that gets `import`ed in the session.
If multiple user shaders are loaded in the same session,
and import the same module (e.g., two source files do `import X`)
then there will only be one copy of `X` loaded within the session.
In order to be able to generate target code, the session
owns a list of available compilation targets, which specify
code generation options.
Code loaded and compiled within a session is owned by the session
and will remain resident in memory until the session is released.
Applications wishing to control the memory usage for compiled
and loaded code should use multiple sessions.
*/
[COM("67618701-d116-468f-ab3b-47-4b-ed-ce-0e-3d")]
public interface ISession
{
};
[COM("5bc42be8-5c50-4929-9e5e-d15e7c24015f")]
public interface IComponentType
{
}
public struct TypeLayoutReflection { }
/** The kind of specialization argument. */
public enum class SpecializationArgKind : int32_t
{
Unknown, /**< An invalid specialization argument. */
Type, /**< Specialize to a type. */
};
public struct SpecializationArg
{
public SpecializationArgKind kind;
/** A type specialization argument, used for `Kind::Type`. */
public TypeReflection *type;
}
}

View file

@ -1,44 +0,0 @@
####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() #######
####### Any changes to this file will be overwritten by the next CMake run ####
####### The input file was SlangConfig.cmake.in ########
get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../" ABSOLUTE)
macro(set_and_check _var _file)
set(${_var} "${_file}")
if(NOT EXISTS "${_file}")
message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
endif()
endmacro()
macro(check_required_components _NAME)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(NOT ${_NAME}_${comp}_FOUND)
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
endif()
endif()
endforeach()
endmacro()
####################################################################################
if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
include("${CMAKE_CURRENT_LIST_DIR}/slangTargets.cmake")
check_required_components("slang")
endif()
if(ON)
find_program(SLANGC_EXECUTABLE "slangc" HINTS ENV PATH "${PACKAGE_PREFIX_DIR}/bin")
if (NOT SLANGC_EXECUTABLE)
message(STATUS "slangc executable not found; ensure it is available in your PATH.")
endif()
set(SLANG_EXECUTABLE ${SLANGC_EXECUTABLE} CACHE STRING "Path to the slangc executable")
endif()

View file

@ -1,65 +0,0 @@
# This is a basic version file for the Config-mode of find_package().
# It is used by write_basic_package_version_file() as input file for configure_file()
# to create a version-file which can be installed along a config.cmake file.
#
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
# the requested version string are exactly the same and it sets
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
# but only if the requested major version is the same as the current one.
# The variable CVF_VERSION must be set before calling configure_file().
set(PACKAGE_VERSION "2025.3.1")
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
if("2025.3.1" MATCHES "^([0-9]+)\\.")
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0)
string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}")
endif()
else()
set(CVF_VERSION_MAJOR "2025.3.1")
endif()
if(PACKAGE_FIND_VERSION_RANGE)
# both endpoints of the range must have the expected major version
math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1")
if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT)))
set(PACKAGE_VERSION_COMPATIBLE FALSE)
elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX)))
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
else()
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()
endif()
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "")
return()
endif()
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8")
math(EXPR installedBits "8 * 8")
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()

View file

@ -1,70 +0,0 @@
#----------------------------------------------------------------
# Generated CMake target import file for configuration "Release".
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Import target "slang::slang-llvm" for configuration "Release"
set_property(TARGET slang::slang-llvm APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slang-llvm PROPERTIES
IMPORTED_COMMON_LANGUAGE_RUNTIME_RELEASE ""
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libslang-llvm.so"
IMPORTED_NO_SONAME_RELEASE "TRUE"
)
list(APPEND _cmake_import_check_targets slang::slang-llvm )
list(APPEND _cmake_import_check_files_for_slang::slang-llvm "${_IMPORT_PREFIX}/lib/libslang-llvm.so" )
# Import target "slang::slang-glslang" for configuration "Release"
set_property(TARGET slang::slang-glslang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slang-glslang PROPERTIES
IMPORTED_COMMON_LANGUAGE_RUNTIME_RELEASE ""
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libslang-glslang.so"
IMPORTED_NO_SONAME_RELEASE "TRUE"
)
list(APPEND _cmake_import_check_targets slang::slang-glslang )
list(APPEND _cmake_import_check_files_for_slang::slang-glslang "${_IMPORT_PREFIX}/lib/libslang-glslang.so" )
# Import target "slang::slangd" for configuration "Release"
set_property(TARGET slang::slangd APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slangd PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/bin/slangd"
)
list(APPEND _cmake_import_check_targets slang::slangd )
list(APPEND _cmake_import_check_files_for_slang::slangd "${_IMPORT_PREFIX}/bin/slangd" )
# Import target "slang::gfx" for configuration "Release"
set_property(TARGET slang::gfx APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::gfx PROPERTIES
IMPORTED_LINK_DEPENDENT_LIBRARIES_RELEASE "slang::slang"
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libgfx.so"
IMPORTED_SONAME_RELEASE "libgfx.so"
)
list(APPEND _cmake_import_check_targets slang::gfx )
list(APPEND _cmake_import_check_files_for_slang::gfx "${_IMPORT_PREFIX}/lib/libgfx.so" )
# Import target "slang::slang" for configuration "Release"
set_property(TARGET slang::slang APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slang PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libslang.so"
IMPORTED_SONAME_RELEASE "libslang.so"
)
list(APPEND _cmake_import_check_targets slang::slang )
list(APPEND _cmake_import_check_files_for_slang::slang "${_IMPORT_PREFIX}/lib/libslang.so" )
# Import target "slang::slangc" for configuration "Release"
set_property(TARGET slang::slangc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(slang::slangc PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/bin/slangc"
)
list(APPEND _cmake_import_check_targets slang::slangc )
list(APPEND _cmake_import_check_files_for_slang::slangc "${_IMPORT_PREFIX}/bin/slangc" )
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)

View file

@ -1,123 +0,0 @@
# Generated by CMake
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8)
message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
if(CMAKE_VERSION VERSION_LESS "2.8.3")
message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.8.3...3.29)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_cmake_targets_defined "")
set(_cmake_targets_not_defined "")
set(_cmake_expected_targets "")
foreach(_cmake_expected_target IN ITEMS slang::slang-llvm slang::slang-glslang slang::slangd slang::gfx slang::slang slang::slangc)
list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
if(TARGET "${_cmake_expected_target}")
list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
else()
list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
endif()
endforeach()
unset(_cmake_expected_target)
if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
unset(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
return()
endif()
if(NOT _cmake_targets_defined STREQUAL "")
string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
endif()
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
# Compute the installation prefix relative to this file.
get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
if(_IMPORT_PREFIX STREQUAL "/")
set(_IMPORT_PREFIX "")
endif()
# Create imported target slang::slang-llvm
add_library(slang::slang-llvm MODULE IMPORTED)
set_target_properties(slang::slang-llvm PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "SLANG_DYNAMIC"
)
# Create imported target slang::slang-glslang
add_library(slang::slang-glslang MODULE IMPORTED)
# Create imported target slang::slangd
add_executable(slang::slangd IMPORTED)
# Create imported target slang::gfx
add_library(slang::gfx SHARED IMPORTED)
set_target_properties(slang::gfx PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "SLANG_GFX_DYNAMIC"
)
# Create imported target slang::slang
add_library(slang::slang SHARED IMPORTED)
# Create imported target slang::slangc
add_executable(slang::slangc IMPORTED)
# Load information for each installed configuration.
file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/slangTargets-*.cmake")
foreach(_cmake_config_file IN LISTS _cmake_config_files)
include("${_cmake_config_file}")
endforeach()
unset(_cmake_config_file)
unset(_cmake_config_files)
# Cleanup temporary variables.
set(_IMPORT_PREFIX)
# Loop over all imported files and verify that they actually exist
foreach(_cmake_target IN LISTS _cmake_import_check_targets)
if(CMAKE_VERSION VERSION_LESS "3.28"
OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target}
OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}")
foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}")
if(NOT EXISTS "${_cmake_file}")
message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file
\"${_cmake_file}\"
but this file does not exist. Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
\"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
endif()
endforeach()
endif()
unset(_cmake_file)
unset("_cmake_import_check_files_for_${_cmake_target}")
endforeach()
unset(_cmake_target)
unset(_cmake_import_check_targets)
# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)

View file

@ -1,200 +0,0 @@
#ifndef SLANG_COM_HELPER_H
#define SLANG_COM_HELPER_H
/** \file slang-com-helper.h
*/
#include "slang.h"
#include <atomic>
/* !!!!!!!!!!!!!!!!!!!!! Macros to help checking SlangResult !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/*! Set SLANG_HANDLE_RESULT_FAIL(x) to code to be executed whenever an error occurs, and is detected
* by one of the macros */
#ifndef SLANG_HANDLE_RESULT_FAIL
#define SLANG_HANDLE_RESULT_FAIL(x)
#endif
//! Helper macro, that makes it easy to add result checking to calls in functions/methods that
//! themselves return Result.
#define SLANG_RETURN_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return _res; \
} \
}
//! Helper macro that can be used to test the return value from a call, and will return in a void
//! method/function
#define SLANG_RETURN_VOID_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return; \
} \
}
//! Helper macro that will return false on failure.
#define SLANG_RETURN_FALSE_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return false; \
} \
}
//! Helper macro that will return nullptr on failure.
#define SLANG_RETURN_NULL_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
SLANG_HANDLE_RESULT_FAIL(_res); \
return nullptr; \
} \
}
//! Helper macro that will assert if the return code from a call is failure, also returns the
//! failure.
#define SLANG_ASSERT_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
assert(false); \
return _res; \
} \
}
//! Helper macro that will assert if the result from a call is a failure, also returns.
#define SLANG_ASSERT_VOID_ON_FAIL(x) \
{ \
SlangResult _res = (x); \
if (SLANG_FAILED(_res)) \
{ \
assert(false); \
return; \
} \
}
/* !!!!!!!!!!!!!!!!!!!!!!! C++ helpers !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
#if defined(__cplusplus)
namespace Slang
{
// Alias SlangResult to Slang::Result
typedef SlangResult Result;
// Alias SlangUUID to Slang::Guid
typedef SlangUUID Guid;
} // namespace Slang
// Operator == and != for Guid/SlangUUID
SLANG_FORCE_INLINE bool operator==(const Slang::Guid& aIn, const Slang::Guid& bIn)
{
using namespace Slang;
// Use the largest type the honors the alignment of Guid
typedef uint32_t CmpType;
union GuidCompare
{
Guid guid;
CmpType data[sizeof(Guid) / sizeof(CmpType)];
};
// Type pun - so compiler can 'see' the pun and not break aliasing rules
const CmpType* a = reinterpret_cast<const GuidCompare&>(aIn).data;
const CmpType* b = reinterpret_cast<const GuidCompare&>(bIn).data;
// Make the guid comparison a single branch, by not using short circuit
return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0;
}
SLANG_FORCE_INLINE bool operator!=(const Slang::Guid& a, const Slang::Guid& b)
{
return !(a == b);
}
/* !!!!!!!! Macros to simplify implementing COM interfaces !!!!!!!!!!!!!!!!!!!!!!!!!!!! */
/* Assumes underlying implementation has a member m_refCount that is initialized to 0 and can
have ++ and -- operate on it. For SLANG_IUNKNOWN_QUERY_INTERFACE to work - must have a method
'getInterface' that returns valid pointers for the Guid, or nullptr if not found. */
#define SLANG_IUNKNOWN_QUERY_INTERFACE \
SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface( \
SlangUUID const& uuid, \
void** outObject) SLANG_OVERRIDE \
{ \
ISlangUnknown* intf = getInterface(uuid); \
if (intf) \
{ \
addRef(); \
*outObject = intf; \
return SLANG_OK; \
} \
return SLANG_E_NO_INTERFACE; \
}
#define SLANG_IUNKNOWN_ADD_REF \
SLANG_NO_THROW uint32_t SLANG_MCALL addRef() \
{ \
return ++m_refCount; \
}
#define SLANG_IUNKNOWN_RELEASE \
SLANG_NO_THROW uint32_t SLANG_MCALL release() \
{ \
--m_refCount; \
if (m_refCount == 0) \
{ \
delete this; \
return 0; \
} \
return m_refCount; \
}
#define SLANG_IUNKNOWN_ALL \
SLANG_IUNKNOWN_QUERY_INTERFACE \
SLANG_IUNKNOWN_ADD_REF \
SLANG_IUNKNOWN_RELEASE
// ------------------------ RefObject IUnknown -----------------------------
#define SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface( \
SlangUUID const& uuid, \
void** outObject) SLANG_OVERRIDE \
{ \
void* intf = getInterface(uuid); \
if (intf) \
{ \
addReference(); \
*outObject = intf; \
return SLANG_OK; \
} \
return SLANG_E_NO_INTERFACE; \
}
#define SLANG_REF_OBJECT_IUNKNOWN_ADD_REF \
SLANG_NO_THROW uint32_t SLANG_MCALL addRef() SLANG_OVERRIDE \
{ \
return (uint32_t)addReference(); \
}
#define SLANG_REF_OBJECT_IUNKNOWN_RELEASE \
SLANG_NO_THROW uint32_t SLANG_MCALL release() SLANG_OVERRIDE \
{ \
return (uint32_t)releaseReference(); \
}
#define SLANG_REF_OBJECT_IUNKNOWN_ALL \
SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \
SLANG_REF_OBJECT_IUNKNOWN_ADD_REF \
SLANG_REF_OBJECT_IUNKNOWN_RELEASE
#endif // defined(__cplusplus)
#endif

View file

@ -1,210 +0,0 @@
#ifndef SLANG_COM_PTR_H
#define SLANG_COM_PTR_H
#include "slang-com-helper.h"
#include <assert.h>
#include <cstddef>
namespace Slang
{
/*! \brief ComPtr is a simple smart pointer that manages types which implement COM based interfaces.
\details A class that implements a COM, must derive from the IUnknown interface or a type that
matches it's layout exactly (such as ISlangUnknown). Trying to use this template with a class that
doesn't follow these rules, will lead to undefined behavior. This is a 'strong' pointer type, and
will AddRef when a non null pointer is set and Release when the pointer leaves scope. Using 'detach'
allows a pointer to be removed from the management of the ComPtr. To set the smart pointer to null,
there is the method setNull, or alternatively just assign SLANG_NULL/nullptr.
One edge case using the template is that sometimes you want access as a pointer to a pointer.
Sometimes this is to write into the smart pointer, other times to pass as an array. To handle these
different behaviors there are the methods readRef and writeRef, which are used instead of the &
(ref) operator. For example
\code
Void doSomething(ID3D12Resource** resources, IndexT numResources);
// ...
ComPtr<ID3D12Resource> resources[3];
doSomething(resources[0].readRef(), SLANG_COUNT_OF(resource));
\endcode
A more common scenario writing to the pointer
\code
IUnknown* unk = ...;
ComPtr<ID3D12Resource> resource;
Result res = unk->QueryInterface(resource.writeRef());
\endcode
*/
// Enum to force initializing as an attach (without adding a reference)
enum InitAttach
{
INIT_ATTACH
};
template<class T>
class ComPtr
{
public:
typedef T Type;
typedef ComPtr ThisType;
typedef ISlangUnknown* Ptr;
/// Constructors
/// Default Ctor. Sets to nullptr
SLANG_FORCE_INLINE ComPtr()
: m_ptr(nullptr)
{
}
SLANG_FORCE_INLINE ComPtr(std::nullptr_t)
: m_ptr(nullptr)
{
}
/// Sets, and ref counts.
SLANG_FORCE_INLINE explicit ComPtr(T* ptr)
: m_ptr(ptr)
{
if (ptr)
((Ptr)ptr)->addRef();
}
/// The copy ctor
SLANG_FORCE_INLINE ComPtr(const ThisType& rhs)
: m_ptr(rhs.m_ptr)
{
if (m_ptr)
((Ptr)m_ptr)->addRef();
}
/// Ctor without adding to ref count.
SLANG_FORCE_INLINE explicit ComPtr(InitAttach, T* ptr)
: m_ptr(ptr)
{
}
/// Ctor without adding to ref count
SLANG_FORCE_INLINE ComPtr(InitAttach, const ThisType& rhs)
: m_ptr(rhs.m_ptr)
{
}
#ifdef SLANG_HAS_MOVE_SEMANTICS
/// Move Ctor
SLANG_FORCE_INLINE ComPtr(ThisType&& rhs)
: m_ptr(rhs.m_ptr)
{
rhs.m_ptr = nullptr;
}
/// Move assign
SLANG_FORCE_INLINE ComPtr& operator=(ThisType&& rhs)
{
T* swap = m_ptr;
m_ptr = rhs.m_ptr;
rhs.m_ptr = swap;
return *this;
}
#endif
/// Destructor releases the pointer, assuming it is set
SLANG_FORCE_INLINE ~ComPtr()
{
if (m_ptr)
((Ptr)m_ptr)->release();
}
// !!! Operators !!!
/// Returns the dumb pointer
SLANG_FORCE_INLINE operator T*() const { return m_ptr; }
SLANG_FORCE_INLINE T& operator*() { return *m_ptr; }
/// For making method invocations through the smart pointer work through the dumb pointer
SLANG_FORCE_INLINE T* operator->() const { return m_ptr; }
/// Assign
SLANG_FORCE_INLINE const ThisType& operator=(const ThisType& rhs);
/// Assign from dumb ptr
SLANG_FORCE_INLINE T* operator=(T* in);
/// Get the pointer and don't ref
SLANG_FORCE_INLINE T* get() const { return m_ptr; }
/// Release a contained nullptr pointer if set
SLANG_FORCE_INLINE void setNull();
/// Detach
SLANG_FORCE_INLINE T* detach()
{
T* ptr = m_ptr;
m_ptr = nullptr;
return ptr;
}
/// Set to a pointer without changing the ref count
SLANG_FORCE_INLINE void attach(T* in) { m_ptr = in; }
/// Get ready for writing (nulls contents)
SLANG_FORCE_INLINE T** writeRef()
{
setNull();
return &m_ptr;
}
/// Get for read access
SLANG_FORCE_INLINE T* const* readRef() const { return &m_ptr; }
/// Swap
void swap(ThisType& rhs);
protected:
/// Gets the address of the dumb pointer.
// Disabled: use writeRef and readRef to get a reference based on usage.
#ifndef SLANG_COM_PTR_ENABLE_REF_OPERATOR
SLANG_FORCE_INLINE T** operator&() = delete;
#endif
T* m_ptr;
};
//----------------------------------------------------------------------------
template<typename T>
void ComPtr<T>::setNull()
{
if (m_ptr)
{
((Ptr)m_ptr)->release();
m_ptr = nullptr;
}
}
//----------------------------------------------------------------------------
template<typename T>
const ComPtr<T>& ComPtr<T>::operator=(const ThisType& rhs)
{
if (rhs.m_ptr)
((Ptr)rhs.m_ptr)->addRef();
if (m_ptr)
((Ptr)m_ptr)->release();
m_ptr = rhs.m_ptr;
return *this;
}
//----------------------------------------------------------------------------
template<typename T>
T* ComPtr<T>::operator=(T* ptr)
{
if (ptr)
((Ptr)ptr)->addRef();
if (m_ptr)
((Ptr)m_ptr)->release();
m_ptr = ptr;
return m_ptr;
}
//----------------------------------------------------------------------------
template<typename T>
void ComPtr<T>::swap(ThisType& rhs)
{
T* tmp = m_ptr;
m_ptr = rhs.m_ptr;
rhs.m_ptr = tmp;
}
} // namespace Slang
#endif // SLANG_COM_PTR_H

View file

@ -1,58 +0,0 @@
#ifndef SLANG_CPP_HOST_PRELUDE_H
#define SLANG_CPP_HOST_PRELUDE_H
#include <cmath>
#include <cstdio>
#include <cstring>
#define SLANG_COM_PTR_ENABLE_REF_OPERATOR 1
#include "../source/slang-rt/slang-rt.h"
#include "slang-com-ptr.h"
#include "slang-cpp-types.h"
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
#if SLANG_GCC_FAMILY && __GNUC__ < 6
#include <cmath>
#define SLANG_PRELUDE_STD std::
#else
#include <math.h>
#define SLANG_PRELUDE_STD
#endif
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#endif // SLANG_LLVM
#if defined(_MSC_VER)
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
// __attribute__((__visibility__("default")))
#endif
#ifdef __cplusplus
#define SLANG_PRELUDE_EXTERN_C extern "C"
#define SLANG_PRELUDE_EXTERN_C_START \
extern "C" \
{
#define SLANG_PRELUDE_EXTERN_C_END }
#else
#define SLANG_PRELUDE_EXTERN_C
#define SLANG_PRELUDE_EXTERN_C_START
#define SLANG_PRELUDE_EXTERN_C_END
#endif
#include "slang-cpp-scalar-intrinsics.h"
using namespace Slang;
template<typename TResult, typename... Args>
using Slang_FuncType = TResult(SLANG_MCALL*)(Args...);
#endif

View file

@ -1,322 +0,0 @@
#ifndef SLANG_CPP_PRELUDE_H
#define SLANG_CPP_PRELUDE_H
// Because the signiture of isnan, isfinite, and is isinf changed in C++, we use the macro
// to use the version in the std namespace.
// https://stackoverflow.com/questions/39130040/cmath-hides-isnan-in-math-h-in-c14-c11
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
#if SLANG_GCC_FAMILY && __GNUC__ < 6
#include <cmath>
#define SLANG_PRELUDE_STD std::
#else
#include <math.h>
#define SLANG_PRELUDE_STD
#endif
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#endif // SLANG_LLVM
#if defined(_MSC_VER)
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
// __attribute__((__visibility__("default")))
#endif
#ifdef __cplusplus
#define SLANG_PRELUDE_EXTERN_C extern "C"
#define SLANG_PRELUDE_EXTERN_C_START \
extern "C" \
{
#define SLANG_PRELUDE_EXTERN_C_END }
#else
#define SLANG_PRELUDE_EXTERN_C
#define SLANG_PRELUDE_EXTERN_C_START
#define SLANG_PRELUDE_EXTERN_C_END
#endif
#define SLANG_PRELUDE_EXPORT SLANG_PRELUDE_EXTERN_C SLANG_PRELUDE_SHARED_LIB_EXPORT
#define SLANG_PRELUDE_EXPORT_START SLANG_PRELUDE_EXTERN_C_START SLANG_PRELUDE_SHARED_LIB_EXPORT
#define SLANG_PRELUDE_EXPORT_END SLANG_PRELUDE_EXTERN_C_END
#ifndef INFINITY
// Must overflow for double
#define INFINITY float(1e+300 * 1e+300)
#endif
#ifndef SLANG_INFINITY
#define SLANG_INFINITY INFINITY
#endif
// Detect the compiler type
#ifndef SLANG_COMPILER
#define SLANG_COMPILER
/*
Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/
NOTE that SLANG_VC holds the compiler version - not just 1 or 0
*/
#if defined(_MSC_VER)
#if _MSC_VER >= 1900
#define SLANG_VC 14
#elif _MSC_VER >= 1800
#define SLANG_VC 12
#elif _MSC_VER >= 1700
#define SLANG_VC 11
#elif _MSC_VER >= 1600
#define SLANG_VC 10
#elif _MSC_VER >= 1500
#define SLANG_VC 9
#else
#error "unknown version of Visual C++ compiler"
#endif
#elif defined(__clang__)
#define SLANG_CLANG 1
#elif defined(__SNC__)
#define SLANG_SNC 1
#elif defined(__ghs__)
#define SLANG_GHS 1
#elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */
#define SLANG_GCC 1
#else
#error "unknown compiler"
#endif
/*
Any compilers not detected by the above logic are now now explicitly zeroed out.
*/
#ifndef SLANG_VC
#define SLANG_VC 0
#endif
#ifndef SLANG_CLANG
#define SLANG_CLANG 0
#endif
#ifndef SLANG_SNC
#define SLANG_SNC 0
#endif
#ifndef SLANG_GHS
#define SLANG_GHS 0
#endif
#ifndef SLANG_GCC
#define SLANG_GCC 0
#endif
#endif /* SLANG_COMPILER */
/*
The following section attempts to detect the target platform being compiled for.
If an application defines `SLANG_PLATFORM` before including this header,
they take responsibility for setting any compiler-dependent macros
used later in the file.
Most applications should not need to touch this section.
*/
#ifndef SLANG_PLATFORM
#define SLANG_PLATFORM
/**
Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/
*/
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP
#define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */
#elif defined(XBOXONE)
#define SLANG_XBOXONE 1
#elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */
#define SLANG_WIN64 1
#elif defined(_M_PPC)
#define SLANG_X360 1
#elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */
#define SLANG_WIN32 1
#elif defined(__ANDROID__)
#define SLANG_ANDROID 1
#elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */
#define SLANG_LINUX 1
#elif defined(__APPLE__) && !defined(SLANG_LLVM)
#include "TargetConditionals.h"
#if TARGET_OS_MAC
#define SLANG_OSX 1
#else
#define SLANG_IOS 1
#endif
#elif defined(__APPLE__)
// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its
// OSX.
#define SLANG_OSX 1
#elif defined(__CELLOS_LV2__)
#define SLANG_PS3 1
#elif defined(__ORBIS__)
#define SLANG_PS4 1
#elif defined(__SNC__) && defined(__arm__)
#define SLANG_PSP2 1
#elif defined(__ghs__)
#define SLANG_WIIU 1
#else
#error "unknown target platform"
#endif
/*
Any platforms not detected by the above logic are now now explicitly zeroed out.
*/
#ifndef SLANG_WINRT
#define SLANG_WINRT 0
#endif
#ifndef SLANG_XBOXONE
#define SLANG_XBOXONE 0
#endif
#ifndef SLANG_WIN64
#define SLANG_WIN64 0
#endif
#ifndef SLANG_X360
#define SLANG_X360 0
#endif
#ifndef SLANG_WIN32
#define SLANG_WIN32 0
#endif
#ifndef SLANG_ANDROID
#define SLANG_ANDROID 0
#endif
#ifndef SLANG_LINUX
#define SLANG_LINUX 0
#endif
#ifndef SLANG_IOS
#define SLANG_IOS 0
#endif
#ifndef SLANG_OSX
#define SLANG_OSX 0
#endif
#ifndef SLANG_PS3
#define SLANG_PS3 0
#endif
#ifndef SLANG_PS4
#define SLANG_PS4 0
#endif
#ifndef SLANG_PSP2
#define SLANG_PSP2 0
#endif
#ifndef SLANG_WIIU
#define SLANG_WIIU 0
#endif
#endif /* SLANG_PLATFORM */
/* Shorthands for "families" of compilers/platforms */
#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC)
#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64)
#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY)
#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID)
#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */
#define SLANG_UNIX_FAMILY \
(SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */
// GCC Specific
#if SLANG_GCC_FAMILY
#define SLANG_ALIGN_OF(T) __alignof__(T)
#define SLANG_BREAKPOINT(id) __builtin_trap()
// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a
// non POD type, even though it produces the correct result
#define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1)
#endif // SLANG_GCC_FAMILY
// Microsoft VC specific
#if SLANG_VC
#define SLANG_ALIGN_OF(T) __alignof(T)
#define SLANG_BREAKPOINT(id) __debugbreak();
#endif // SLANG_VC
// Default impls
#ifndef SLANG_OFFSET_OF
#define SLANG_OFFSET_OF(X, Y) offsetof(X, Y)
#endif
#ifndef SLANG_BREAKPOINT
// Make it crash with a write to 0!
#define SLANG_BREAKPOINT(id) (*((int*)0) = int(id));
#endif
// If slang.h has been included we don't need any of these definitions
#ifndef SLANG_H
/* Macro for declaring if a method is no throw. Should be set before the return parameter. */
#ifndef SLANG_NO_THROW
#if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS)
#define SLANG_NO_THROW __declspec(nothrow)
#endif
#endif
#ifndef SLANG_NO_THROW
#define SLANG_NO_THROW
#endif
/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling
convention for interface methods.
*/
#ifndef SLANG_STDCALL
#if SLANG_MICROSOFT_FAMILY
#define SLANG_STDCALL __stdcall
#else
#define SLANG_STDCALL
#endif
#endif
#ifndef SLANG_MCALL
#define SLANG_MCALL SLANG_STDCALL
#endif
#ifndef SLANG_FORCE_INLINE
#define SLANG_FORCE_INLINE inline
#endif
// TODO(JS): Should these be in slang-cpp-types.h?
// They are more likely to clash with slang.h
struct SlangUUID
{
uint32_t data1;
uint16_t data2;
uint16_t data3;
uint8_t data4[8];
};
typedef int32_t SlangResult;
struct ISlangUnknown
{
virtual SLANG_NO_THROW SlangResult SLANG_MCALL
queryInterface(SlangUUID const& uuid, void** outObject) = 0;
virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0;
virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0;
};
#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
public: \
SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid() \
{ \
static const SlangUUID guid = {a, b, c, d0, d1, d2, d3, d4, d5, d6, d7}; \
return guid; \
}
#endif // SLANG_H
// Includes
#include "slang-cpp-scalar-intrinsics.h"
#include "slang-cpp-types.h"
// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables.
#if defined(_MSC_VER)
#pragma warning(disable : 4700)
#endif
#ifndef SLANG_UNROLL
#define SLANG_UNROLL
#endif
#endif

View file

@ -1,805 +0,0 @@
#ifndef SLANG_PRELUDE_SCALAR_INTRINSICS_H
#define SLANG_PRELUDE_SCALAR_INTRINSICS_H
#if !defined(SLANG_LLVM) && SLANG_PROCESSOR_X86_64 && SLANG_VC
// If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include
// x86 intrinsics
#include <intrin.h>
#endif
#ifndef SLANG_FORCE_INLINE
#define SLANG_FORCE_INLINE inline
#endif
#ifdef SLANG_PRELUDE_NAMESPACE
namespace SLANG_PRELUDE_NAMESPACE
{
#endif
#ifndef SLANG_PRELUDE_PI
#define SLANG_PRELUDE_PI 3.14159265358979323846
#endif
union Union32
{
uint32_t u;
int32_t i;
float f;
};
union Union64
{
uint64_t u;
int64_t i;
double d;
};
// 32 bit cast conversions
SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f)
{
Union32 u;
u.f = f;
return u.i;
}
SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i)
{
Union32 u;
u.i = i;
return u.f;
}
SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f)
{
Union32 u;
u.f = f;
return u.u;
}
SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui)
{
Union32 u;
u.u = ui;
return u.f;
}
// ----------------------------- F16 -----------------------------------------
// This impl is based on FloatToHalf that is in Slang codebase
SLANG_FORCE_INLINE uint32_t f32tof16(const float value)
{
const uint32_t inBits = _bitCastFloatToUInt(value);
// bits initially set to just the sign bit
uint32_t bits = (inBits >> 16) & 0x8000;
// Mantissa can't be used as is, as it holds last bit, for rounding.
uint32_t m = (inBits >> 12) & 0x07ff;
uint32_t e = (inBits >> 23) & 0xff;
if (e < 103)
{
// It's zero
return bits;
}
if (e == 0xff)
{
// Could be a NAN or INF. Is INF if *input* mantissa is 0.
// Remove last bit for rounding to make output mantissa.
m >>= 1;
// We *assume* float16/float32 signaling bit and remaining bits
// semantics are the same. (The signalling bit convention is target specific!).
// Non signal bit's usage within mantissa for a NAN are also target specific.
// If the m is 0, it could be because the result is INF, but it could also be because all
// the bits that made NAN were dropped as we have less mantissa bits in f16.
// To fix for this we make non zero if m is 0 and the input mantissa was not.
// This will (typically) produce a signalling NAN.
m += uint32_t(m == 0 && (inBits & 0x007fffffu));
// Combine for output
return (bits | 0x7c00u | m);
}
if (e > 142)
{
// INF.
return bits | 0x7c00u;
}
if (e < 113)
{
m |= 0x0800u;
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return bits;
}
bits |= ((e - 112) << 10) | (m >> 1);
bits += m & 1;
return bits;
}
static const float g_f16tof32Magic = _bitCastIntToFloat((127 + (127 - 15)) << 23);
SLANG_FORCE_INLINE float f16tof32(const uint32_t value)
{
const uint32_t sign = (value & 0x8000) << 16;
uint32_t exponent = (value & 0x7c00) >> 10;
uint32_t mantissa = (value & 0x03ff);
if (exponent == 0)
{
// If mantissa is 0 we are done, as output is 0.
// If it's not zero we must have a denormal.
if (mantissa)
{
// We have a denormal so use the magic to do exponent adjust
return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic;
}
}
else
{
// If the exponent is NAN or INF exponent is 0x1f on input.
// If that's the case, we just need to set the exponent to 0xff on output
// and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just
// copy the bits
//
// Else we need to correct the exponent in the normalized case.
exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127));
}
return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13));
}
// ----------------------------- F32 -----------------------------------------
// Helpers
SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians);
#ifdef SLANG_LLVM
SLANG_PRELUDE_EXTERN_C_START
// Unary
float F32_ceil(float f);
float F32_floor(float f);
float F32_round(float f);
float F32_sin(float f);
float F32_cos(float f);
float F32_tan(float f);
float F32_asin(float f);
float F32_acos(float f);
float F32_atan(float f);
float F32_sinh(float f);
float F32_cosh(float f);
float F32_tanh(float f);
float F32_log2(float f);
float F32_log(float f);
float F32_log10(float f);
float F32_exp2(float f);
float F32_exp(float f);
float F32_abs(float f);
float F32_trunc(float f);
float F32_sqrt(float f);
bool F32_isnan(float f);
bool F32_isfinite(float f);
bool F32_isinf(float f);
// Binary
SLANG_FORCE_INLINE float F32_min(float a, float b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE float F32_max(float a, float b)
{
return a > b ? a : b;
}
float F32_pow(float a, float b);
float F32_fmod(float a, float b);
float F32_remainder(float a, float b);
float F32_atan2(float a, float b);
float F32_frexp(float x, int* e);
float F32_modf(float x, float* ip);
// Ternary
SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
{
return a * b + c;
}
SLANG_PRELUDE_EXTERN_C_END
#else
// Unary
SLANG_FORCE_INLINE float F32_ceil(float f)
{
return ::ceilf(f);
}
SLANG_FORCE_INLINE float F32_floor(float f)
{
return ::floorf(f);
}
SLANG_FORCE_INLINE float F32_round(float f)
{
return ::roundf(f);
}
SLANG_FORCE_INLINE float F32_sin(float f)
{
return ::sinf(f);
}
SLANG_FORCE_INLINE float F32_cos(float f)
{
return ::cosf(f);
}
SLANG_FORCE_INLINE float F32_tan(float f)
{
return ::tanf(f);
}
SLANG_FORCE_INLINE float F32_asin(float f)
{
return ::asinf(f);
}
SLANG_FORCE_INLINE float F32_acos(float f)
{
return ::acosf(f);
}
SLANG_FORCE_INLINE float F32_atan(float f)
{
return ::atanf(f);
}
SLANG_FORCE_INLINE float F32_sinh(float f)
{
return ::sinhf(f);
}
SLANG_FORCE_INLINE float F32_cosh(float f)
{
return ::coshf(f);
}
SLANG_FORCE_INLINE float F32_tanh(float f)
{
return ::tanhf(f);
}
SLANG_FORCE_INLINE float F32_log2(float f)
{
return ::log2f(f);
}
SLANG_FORCE_INLINE float F32_log(float f)
{
return ::logf(f);
}
SLANG_FORCE_INLINE float F32_log10(float f)
{
return ::log10f(f);
}
SLANG_FORCE_INLINE float F32_exp2(float f)
{
return ::exp2f(f);
}
SLANG_FORCE_INLINE float F32_exp(float f)
{
return ::expf(f);
}
SLANG_FORCE_INLINE float F32_abs(float f)
{
return ::fabsf(f);
}
SLANG_FORCE_INLINE float F32_trunc(float f)
{
return ::truncf(f);
}
SLANG_FORCE_INLINE float F32_sqrt(float f)
{
return ::sqrtf(f);
}
SLANG_FORCE_INLINE bool F32_isnan(float f)
{
return SLANG_PRELUDE_STD isnan(f);
}
SLANG_FORCE_INLINE bool F32_isfinite(float f)
{
return SLANG_PRELUDE_STD isfinite(f);
}
SLANG_FORCE_INLINE bool F32_isinf(float f)
{
return SLANG_PRELUDE_STD isinf(f);
}
// Binary
SLANG_FORCE_INLINE float F32_min(float a, float b)
{
return ::fminf(a, b);
}
SLANG_FORCE_INLINE float F32_max(float a, float b)
{
return ::fmaxf(a, b);
}
SLANG_FORCE_INLINE float F32_pow(float a, float b)
{
return ::powf(a, b);
}
SLANG_FORCE_INLINE float F32_fmod(float a, float b)
{
return ::fmodf(a, b);
}
SLANG_FORCE_INLINE float F32_remainder(float a, float b)
{
return ::remainderf(a, b);
}
SLANG_FORCE_INLINE float F32_atan2(float a, float b)
{
return float(::atan2(a, b));
}
SLANG_FORCE_INLINE float F32_frexp(float x, int* e)
{
return ::frexpf(x, e);
}
SLANG_FORCE_INLINE float F32_modf(float x, float* ip)
{
return ::modff(x, ip);
}
// Ternary
SLANG_FORCE_INLINE float F32_fma(float a, float b, float c)
{
return ::fmaf(a, b, c);
}
#endif
SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians)
{
// Put 0 to 2pi cycles to cycle around 0 to 1
float a = radians * (1.0f / float(SLANG_PRELUDE_PI * 2));
// Get truncated fraction, as value in 0 - 1 range
a = a - F32_floor(a);
// Convert back to 0 - 2pi range
return (a * float(SLANG_PRELUDE_PI * 2));
}
SLANG_FORCE_INLINE float F32_rsqrt(float f)
{
return 1.0f / F32_sqrt(f);
}
SLANG_FORCE_INLINE float F32_sign(float f)
{
return (f == 0.0f) ? f : ((f < 0.0f) ? -1.0f : 1.0f);
}
SLANG_FORCE_INLINE float F32_frac(float f)
{
return f - F32_floor(f);
}
SLANG_FORCE_INLINE uint32_t F32_asuint(float f)
{
Union32 u;
u.f = f;
return u.u;
}
SLANG_FORCE_INLINE int32_t F32_asint(float f)
{
Union32 u;
u.f = f;
return u.i;
}
// ----------------------------- F64 -----------------------------------------
SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians);
#ifdef SLANG_LLVM
SLANG_PRELUDE_EXTERN_C_START
// Unary
double F64_ceil(double f);
double F64_floor(double f);
double F64_round(double f);
double F64_sin(double f);
double F64_cos(double f);
double F64_tan(double f);
double F64_asin(double f);
double F64_acos(double f);
double F64_atan(double f);
double F64_sinh(double f);
double F64_cosh(double f);
double F64_tanh(double f);
double F64_log2(double f);
double F64_log(double f);
double F64_log10(double f);
double F64_exp2(double f);
double F64_exp(double f);
double F64_abs(double f);
double F64_trunc(double f);
double F64_sqrt(double f);
bool F64_isnan(double f);
bool F64_isfinite(double f);
bool F64_isinf(double f);
// Binary
SLANG_FORCE_INLINE double F64_min(double a, double b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE double F64_max(double a, double b)
{
return a > b ? a : b;
}
double F64_pow(double a, double b);
double F64_fmod(double a, double b);
double F64_remainder(double a, double b);
double F64_atan2(double a, double b);
double F64_frexp(double x, int* e);
double F64_modf(double x, double* ip);
// Ternary
SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
{
return a * b + c;
}
SLANG_PRELUDE_EXTERN_C_END
#else // SLANG_LLVM
// Unary
SLANG_FORCE_INLINE double F64_ceil(double f)
{
return ::ceil(f);
}
SLANG_FORCE_INLINE double F64_floor(double f)
{
return ::floor(f);
}
SLANG_FORCE_INLINE double F64_round(double f)
{
return ::round(f);
}
SLANG_FORCE_INLINE double F64_sin(double f)
{
return ::sin(f);
}
SLANG_FORCE_INLINE double F64_cos(double f)
{
return ::cos(f);
}
SLANG_FORCE_INLINE double F64_tan(double f)
{
return ::tan(f);
}
SLANG_FORCE_INLINE double F64_asin(double f)
{
return ::asin(f);
}
SLANG_FORCE_INLINE double F64_acos(double f)
{
return ::acos(f);
}
SLANG_FORCE_INLINE double F64_atan(double f)
{
return ::atan(f);
}
SLANG_FORCE_INLINE double F64_sinh(double f)
{
return ::sinh(f);
}
SLANG_FORCE_INLINE double F64_cosh(double f)
{
return ::cosh(f);
}
SLANG_FORCE_INLINE double F64_tanh(double f)
{
return ::tanh(f);
}
SLANG_FORCE_INLINE double F64_log2(double f)
{
return ::log2(f);
}
SLANG_FORCE_INLINE double F64_log(double f)
{
return ::log(f);
}
SLANG_FORCE_INLINE double F64_log10(float f)
{
return ::log10(f);
}
SLANG_FORCE_INLINE double F64_exp2(double f)
{
return ::exp2(f);
}
SLANG_FORCE_INLINE double F64_exp(double f)
{
return ::exp(f);
}
SLANG_FORCE_INLINE double F64_abs(double f)
{
return ::fabs(f);
}
SLANG_FORCE_INLINE double F64_trunc(double f)
{
return ::trunc(f);
}
SLANG_FORCE_INLINE double F64_sqrt(double f)
{
return ::sqrt(f);
}
SLANG_FORCE_INLINE bool F64_isnan(double f)
{
return SLANG_PRELUDE_STD isnan(f);
}
SLANG_FORCE_INLINE bool F64_isfinite(double f)
{
return SLANG_PRELUDE_STD isfinite(f);
}
SLANG_FORCE_INLINE bool F64_isinf(double f)
{
return SLANG_PRELUDE_STD isinf(f);
}
// Binary
SLANG_FORCE_INLINE double F64_min(double a, double b)
{
return ::fmin(a, b);
}
SLANG_FORCE_INLINE double F64_max(double a, double b)
{
return ::fmax(a, b);
}
SLANG_FORCE_INLINE double F64_pow(double a, double b)
{
return ::pow(a, b);
}
SLANG_FORCE_INLINE double F64_fmod(double a, double b)
{
return ::fmod(a, b);
}
SLANG_FORCE_INLINE double F64_remainder(double a, double b)
{
return ::remainder(a, b);
}
SLANG_FORCE_INLINE double F64_atan2(double a, double b)
{
return ::atan2(a, b);
}
SLANG_FORCE_INLINE double F64_frexp(double x, int* e)
{
return ::frexp(x, e);
}
SLANG_FORCE_INLINE double F64_modf(double x, double* ip)
{
return ::modf(x, ip);
}
// Ternary
SLANG_FORCE_INLINE double F64_fma(double a, double b, double c)
{
return ::fma(a, b, c);
}
#endif // SLANG_LLVM
SLANG_FORCE_INLINE double F64_rsqrt(double f)
{
return 1.0 / F64_sqrt(f);
}
SLANG_FORCE_INLINE double F64_sign(double f)
{
return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0);
}
SLANG_FORCE_INLINE double F64_frac(double f)
{
return f - F64_floor(f);
}
SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t* low, uint32_t* hi)
{
Union64 u;
u.d = d;
*low = uint32_t(u.u);
*hi = uint32_t(u.u >> 32);
}
SLANG_FORCE_INLINE void F64_asint(double d, int32_t* low, int32_t* hi)
{
Union64 u;
u.d = d;
*low = int32_t(u.u);
*hi = int32_t(u.u >> 32);
}
SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians)
{
// Put 0 to 2pi cycles to cycle around 0 to 1
double a = radians * (1.0f / (SLANG_PRELUDE_PI * 2));
// Get truncated fraction, as value in 0 - 1 range
a = a - F64_floor(a);
// Convert back to 0 - 2pi range
return (a * (SLANG_PRELUDE_PI * 2));
}
// ----------------------------- I32 -----------------------------------------
SLANG_FORCE_INLINE int32_t I32_abs(int32_t f)
{
return (f < 0) ? -f : f;
}
SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b)
{
return a > b ? a : b;
}
SLANG_FORCE_INLINE float I32_asfloat(int32_t x)
{
Union32 u;
u.i = x;
return u.f;
}
SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x)
{
return uint32_t(x);
}
SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi)
{
Union64 u;
u.u = (uint64_t(hi) << 32) | uint32_t(low);
return u.d;
}
// ----------------------------- U32 -----------------------------------------
SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f)
{
return f;
}
SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b)
{
return a > b ? a : b;
}
SLANG_FORCE_INLINE float U32_asfloat(uint32_t x)
{
Union32 u;
u.u = x;
return u.f;
}
SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x)
{
return uint32_t(x);
}
SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi)
{
Union64 u;
u.u = (uint64_t(hi) << 32) | low;
return u.d;
}
SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v)
{
#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
return __builtin_popcount(v);
#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
return __popcnt(v);
#else
uint32_t c = 0;
while (v)
{
c++;
v &= v - 1;
}
return c;
#endif
}
// ----------------------------- U64 -----------------------------------------
SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f)
{
return f;
}
SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b)
{
return a > b ? a : b;
}
// TODO(JS): We don't define countbits for 64bit in the core module currently.
// It's not clear from documentation if it should return 32 or 64 bits, if it exists.
// 32 bits can always hold the result, and will be implicitly promoted.
SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v)
{
#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM)
return uint32_t(__builtin_popcountl(v));
#elif SLANG_PROCESSOR_X86_64 && SLANG_VC
return uint32_t(__popcnt64(v));
#else
uint32_t c = 0;
while (v)
{
c++;
v &= v - 1;
}
return c;
#endif
}
// ----------------------------- I64 -----------------------------------------
SLANG_FORCE_INLINE int64_t I64_abs(int64_t f)
{
return (f < 0) ? -f : f;
}
SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b)
{
return a < b ? a : b;
}
SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b)
{
return a > b ? a : b;
}
// ----------------------------- Interlocked ---------------------------------
#if SLANG_LLVM
#else // SLANG_LLVM
#ifdef _WIN32
#include <intrin.h>
#endif
SLANG_FORCE_INLINE void InterlockedAdd(uint32_t* dest, uint32_t value, uint32_t* oldValue)
{
#ifdef _WIN32
*oldValue = _InterlockedExchangeAdd((long*)dest, (long)value);
#else
*oldValue = __sync_fetch_and_add(dest, value);
#endif
}
#endif // SLANG_LLVM
// ----------------------- fmod --------------------------
SLANG_FORCE_INLINE float _slang_fmod(float x, float y)
{
return F32_fmod(x, y);
}
SLANG_FORCE_INLINE double _slang_fmod(double x, double y)
{
return F64_fmod(x, y);
}
#ifdef SLANG_PRELUDE_NAMESPACE
}
#endif
#endif

View file

@ -1,671 +0,0 @@
#ifndef SLANG_PRELUDE_CPP_TYPES_CORE_H
#define SLANG_PRELUDE_CPP_TYPES_CORE_H
#ifndef SLANG_PRELUDE_ASSERT
#ifdef SLANG_PRELUDE_ENABLE_ASSERT
#define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE)
#else
#define SLANG_PRELUDE_ASSERT(VALUE)
#endif
#endif
// Since we are using unsigned arithmatic care is need in this comparison.
// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0
// Which means only a single test is needed
// Asserts for bounds checking.
// It is assumed index/count are unsigned types.
#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count);
#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0);
// Macros to zero index if an access is out of range
#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0;
#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
index = (index <= (sizeInBytes - elemSize)) ? index : 0;
// The 'FIX' macro define how the index is fixed. The default is to do nothing. If
// SLANG_ENABLE_BOUND_ZERO_INDEX the fix macro will zero the index, if out of range
#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX
#define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count)
#else
#define SLANG_BOUND_FIX(index, count)
#define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#define SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
#ifndef SLANG_BOUND_CHECK
#define SLANG_BOUND_CHECK(index, count) \
SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count)
#endif
#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS
#define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) \
SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes)
#endif
#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY
#define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) \
SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count)
#endif
struct TypeInfo
{
size_t typeSize;
};
template<typename T, size_t SIZE>
struct FixedArray
{
const T& operator[](size_t index) const
{
SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
return m_data[index];
}
T& operator[](size_t index)
{
SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE);
return m_data[index];
}
T m_data[SIZE];
};
// An array that has no specified size, becomes a 'Array'. This stores the size so it can
// potentially do bounds checking.
template<typename T>
struct Array
{
const T& operator[](size_t index) const
{
SLANG_BOUND_CHECK(index, count);
return data[index];
}
T& operator[](size_t index)
{
SLANG_BOUND_CHECK(index, count);
return data[index];
}
T* data;
size_t count;
};
/* Constant buffers become a pointer to the contained type, so ConstantBuffer<T> becomes T* in C++
* code.
*/
template<typename T, int COUNT>
struct Vector;
template<typename T>
struct Vector<T, 1>
{
T x;
const T& operator[](size_t /*index*/) const { return x; }
T& operator[](size_t /*index*/) { return x; }
operator T() const { return x; }
Vector() = default;
Vector(T scalar) { x = scalar; }
template<typename U>
Vector(Vector<U, 1> other)
{
x = (T)other.x;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 1;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T>
struct Vector<T, 2>
{
T x, y;
const T& operator[](size_t index) const { return index == 0 ? x : y; }
T& operator[](size_t index) { return index == 0 ? x : y; }
Vector() = default;
Vector(T scalar) { x = y = scalar; }
Vector(T _x, T _y)
{
x = _x;
y = _y;
}
template<typename U>
Vector(Vector<U, 2> other)
{
x = (T)other.x;
y = (T)other.y;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 2;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T>
struct Vector<T, 3>
{
T x, y, z;
const T& operator[](size_t index) const { return *((T*)(this) + index); }
T& operator[](size_t index) { return *((T*)(this) + index); }
Vector() = default;
Vector(T scalar) { x = y = z = scalar; }
Vector(T _x, T _y, T _z)
{
x = _x;
y = _y;
z = _z;
}
template<typename U>
Vector(Vector<U, 3> other)
{
x = (T)other.x;
y = (T)other.y;
z = (T)other.z;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 3;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T>
struct Vector<T, 4>
{
T x, y, z, w;
const T& operator[](size_t index) const { return *((T*)(this) + index); }
T& operator[](size_t index) { return *((T*)(this) + index); }
Vector() = default;
Vector(T scalar) { x = y = z = w = scalar; }
Vector(T _x, T _y, T _z, T _w)
{
x = _x;
y = _y;
z = _z;
w = _w;
}
template<typename U, int otherSize>
Vector(Vector<U, otherSize> other)
{
int minSize = 4;
if (otherSize < minSize)
minSize = otherSize;
for (int i = 0; i < minSize; i++)
(*this)[i] = (T)other[i];
}
};
template<typename T, int N>
SLANG_FORCE_INLINE Vector<T, N> _slang_select(
Vector<bool, N> condition,
Vector<T, N> v0,
Vector<T, N> v1)
{
Vector<T, N> result;
for (int i = 0; i < N; i++)
{
result[i] = condition[i] ? v0[i] : v1[i];
}
return result;
}
template<typename T>
SLANG_FORCE_INLINE T _slang_select(bool condition, T v0, T v1)
{
return condition ? v0 : v1;
}
template<typename T, int N>
SLANG_FORCE_INLINE T _slang_vector_get_element(Vector<T, N> x, int index)
{
return x[index];
}
template<typename T, int N>
SLANG_FORCE_INLINE const T* _slang_vector_get_element_ptr(const Vector<T, N>* x, int index)
{
return &((*const_cast<Vector<T, N>*>(x))[index]);
}
template<typename T, int N>
SLANG_FORCE_INLINE T* _slang_vector_get_element_ptr(Vector<T, N>* x, int index)
{
return &((*x)[index]);
}
template<typename T, int n, typename OtherT, int m>
SLANG_FORCE_INLINE Vector<T, n> _slang_vector_reshape(const Vector<OtherT, m> other)
{
Vector<T, n> result;
for (int i = 0; i < n; i++)
{
OtherT otherElement = T(0);
if (i < m)
otherElement = _slang_vector_get_element(other, i);
*_slang_vector_get_element_ptr(&result, i) = (T)otherElement;
}
return result;
}
typedef uint32_t uint;
#define SLANG_VECTOR_BINARY_OP(T, op) \
template<int n> \
SLANG_FORCE_INLINE Vector<T, n> operator op( \
const Vector<T, n>& thisVal, \
const Vector<T, n>& other) \
{ \
Vector<T, n> result; \
for (int i = 0; i < n; i++) \
result[i] = thisVal[i] op other[i]; \
return result; \
}
#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op) \
template<int n> \
SLANG_FORCE_INLINE Vector<bool, n> operator op( \
const Vector<T, n>& thisVal, \
const Vector<T, n>& other) \
{ \
Vector<bool, n> result; \
for (int i = 0; i < n; i++) \
result[i] = thisVal[i] op other[i]; \
return result; \
}
#define SLANG_VECTOR_UNARY_OP(T, op) \
template<int n> \
SLANG_FORCE_INLINE Vector<T, n> operator op(const Vector<T, n>& thisVal) \
{ \
Vector<T, n> result; \
for (int i = 0; i < n; i++) \
result[i] = op thisVal[i]; \
return result; \
}
#define SLANG_INT_VECTOR_OPS(T) \
SLANG_VECTOR_BINARY_OP(T, +) \
SLANG_VECTOR_BINARY_OP(T, -) \
SLANG_VECTOR_BINARY_OP(T, *) \
SLANG_VECTOR_BINARY_OP(T, /) \
SLANG_VECTOR_BINARY_OP(T, &) \
SLANG_VECTOR_BINARY_OP(T, |) \
SLANG_VECTOR_BINARY_OP(T, &&) \
SLANG_VECTOR_BINARY_OP(T, ||) \
SLANG_VECTOR_BINARY_OP(T, ^) \
SLANG_VECTOR_BINARY_OP(T, %) \
SLANG_VECTOR_BINARY_OP(T, >>) \
SLANG_VECTOR_BINARY_OP(T, <<) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, !=) \
SLANG_VECTOR_UNARY_OP(T, !) \
SLANG_VECTOR_UNARY_OP(T, ~)
#define SLANG_FLOAT_VECTOR_OPS(T) \
SLANG_VECTOR_BINARY_OP(T, +) \
SLANG_VECTOR_BINARY_OP(T, -) \
SLANG_VECTOR_BINARY_OP(T, *) \
SLANG_VECTOR_BINARY_OP(T, /) \
SLANG_VECTOR_UNARY_OP(T, -) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, >=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, <=) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, ==) \
SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)
SLANG_INT_VECTOR_OPS(bool)
SLANG_INT_VECTOR_OPS(int)
SLANG_INT_VECTOR_OPS(int8_t)
SLANG_INT_VECTOR_OPS(int16_t)
SLANG_INT_VECTOR_OPS(int64_t)
SLANG_INT_VECTOR_OPS(uint)
SLANG_INT_VECTOR_OPS(uint8_t)
SLANG_INT_VECTOR_OPS(uint16_t)
SLANG_INT_VECTOR_OPS(uint64_t)
SLANG_FLOAT_VECTOR_OPS(float)
SLANG_FLOAT_VECTOR_OPS(double)
#define SLANG_VECTOR_INT_NEG_OP(T) \
template<int N> \
Vector<T, N> operator-(const Vector<T, N>& thisVal) \
{ \
Vector<T, N> result; \
for (int i = 0; i < N; i++) \
result[i] = 0 - thisVal[i]; \
return result; \
}
SLANG_VECTOR_INT_NEG_OP(int)
SLANG_VECTOR_INT_NEG_OP(int8_t)
SLANG_VECTOR_INT_NEG_OP(int16_t)
SLANG_VECTOR_INT_NEG_OP(int64_t)
SLANG_VECTOR_INT_NEG_OP(uint)
SLANG_VECTOR_INT_NEG_OP(uint8_t)
SLANG_VECTOR_INT_NEG_OP(uint16_t)
SLANG_VECTOR_INT_NEG_OP(uint64_t)
#define SLANG_FLOAT_VECTOR_MOD(T) \
template<int N> \
Vector<T, N> operator%(const Vector<T, N>& left, const Vector<T, N>& right) \
{ \
Vector<T, N> result; \
for (int i = 0; i < N; i++) \
result[i] = _slang_fmod(left[i], right[i]); \
return result; \
}
SLANG_FLOAT_VECTOR_MOD(float)
SLANG_FLOAT_VECTOR_MOD(double)
#undef SLANG_FLOAT_VECTOR_MOD
#undef SLANG_VECTOR_BINARY_OP
#undef SLANG_VECTOR_UNARY_OP
#undef SLANG_INT_VECTOR_OPS
#undef SLANG_FLOAT_VECTOR_OPS
#undef SLANG_VECTOR_INT_NEG_OP
#undef SLANG_FLOAT_VECTOR_MOD
template<typename T, int ROWS, int COLS>
struct Matrix
{
Vector<T, COLS> rows[ROWS];
Vector<T, COLS>& operator[](size_t index) { return rows[index]; }
Matrix() = default;
Matrix(T scalar)
{
for (int i = 0; i < ROWS; i++)
rows[i] = Vector<T, COLS>(scalar);
}
Matrix(const Vector<T, COLS>& row0) { rows[0] = row0; }
Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1)
{
rows[0] = row0;
rows[1] = row1;
}
Matrix(const Vector<T, COLS>& row0, const Vector<T, COLS>& row1, const Vector<T, COLS>& row2)
{
rows[0] = row0;
rows[1] = row1;
rows[2] = row2;
}
Matrix(
const Vector<T, COLS>& row0,
const Vector<T, COLS>& row1,
const Vector<T, COLS>& row2,
const Vector<T, COLS>& row3)
{
rows[0] = row0;
rows[1] = row1;
rows[2] = row2;
rows[3] = row3;
}
template<typename U, int otherRow, int otherCol>
Matrix(const Matrix<U, otherRow, otherCol>& other)
{
int minRow = ROWS;
int minCol = COLS;
if (minRow > otherRow)
minRow = otherRow;
if (minCol > otherCol)
minCol = otherCol;
for (int i = 0; i < minRow; i++)
for (int j = 0; j < minCol; j++)
rows[i][j] = (T)other.rows[i][j];
}
Matrix(T v0, T v1, T v2, T v3)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[1][0] = v2;
rows[1][1] = v3;
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5)
{
if (COLS == 3)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[1][0] = v3;
rows[1][1] = v4;
rows[1][2] = v5;
}
else
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[1][0] = v2;
rows[1][1] = v3;
rows[2][0] = v4;
rows[2][1] = v5;
}
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7)
{
if (COLS == 4)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[0][3] = v3;
rows[1][0] = v4;
rows[1][1] = v5;
rows[1][2] = v6;
rows[1][3] = v7;
}
else
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[1][0] = v2;
rows[1][1] = v3;
rows[2][0] = v4;
rows[2][1] = v5;
rows[3][0] = v6;
rows[3][1] = v7;
}
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[1][0] = v3;
rows[1][1] = v4;
rows[1][2] = v5;
rows[2][0] = v6;
rows[2][1] = v7;
rows[2][2] = v8;
}
Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11)
{
if (COLS == 4)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[0][3] = v3;
rows[1][0] = v4;
rows[1][1] = v5;
rows[1][2] = v6;
rows[1][3] = v7;
rows[2][0] = v8;
rows[2][1] = v9;
rows[2][2] = v10;
rows[2][3] = v11;
}
else
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[1][0] = v3;
rows[1][1] = v4;
rows[1][2] = v5;
rows[2][0] = v6;
rows[2][1] = v7;
rows[2][2] = v8;
rows[3][0] = v9;
rows[3][1] = v10;
rows[3][2] = v11;
}
}
Matrix(
T v0,
T v1,
T v2,
T v3,
T v4,
T v5,
T v6,
T v7,
T v8,
T v9,
T v10,
T v11,
T v12,
T v13,
T v14,
T v15)
{
rows[0][0] = v0;
rows[0][1] = v1;
rows[0][2] = v2;
rows[0][3] = v3;
rows[1][0] = v4;
rows[1][1] = v5;
rows[1][2] = v6;
rows[1][3] = v7;
rows[2][0] = v8;
rows[2][1] = v9;
rows[2][2] = v10;
rows[2][3] = v11;
rows[3][0] = v12;
rows[3][1] = v13;
rows[3][2] = v14;
rows[3][3] = v15;
}
};
#define SLANG_MATRIX_BINARY_OP(T, op) \
template<int R, int C> \
Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal, const Matrix<T, R, C>& other) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j]; \
return result; \
}
#define SLANG_MATRIX_UNARY_OP(T, op) \
template<int R, int C> \
Matrix<T, R, C> operator op(const Matrix<T, R, C>& thisVal) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result[i].rows[i][j] = op thisVal.rows[i][j]; \
return result; \
}
#define SLANG_INT_MATRIX_OPS(T) \
SLANG_MATRIX_BINARY_OP(T, +) \
SLANG_MATRIX_BINARY_OP(T, -) \
SLANG_MATRIX_BINARY_OP(T, *) \
SLANG_MATRIX_BINARY_OP(T, /) \
SLANG_MATRIX_BINARY_OP(T, &) \
SLANG_MATRIX_BINARY_OP(T, |) \
SLANG_MATRIX_BINARY_OP(T, &&) \
SLANG_MATRIX_BINARY_OP(T, ||) \
SLANG_MATRIX_BINARY_OP(T, ^) \
SLANG_MATRIX_BINARY_OP(T, %) \
SLANG_MATRIX_UNARY_OP(T, !) \
SLANG_MATRIX_UNARY_OP(T, ~)
#define SLANG_FLOAT_MATRIX_OPS(T) \
SLANG_MATRIX_BINARY_OP(T, +) \
SLANG_MATRIX_BINARY_OP(T, -) \
SLANG_MATRIX_BINARY_OP(T, *) \
SLANG_MATRIX_BINARY_OP(T, /) \
SLANG_MATRIX_UNARY_OP(T, -)
SLANG_INT_MATRIX_OPS(int)
SLANG_INT_MATRIX_OPS(int8_t)
SLANG_INT_MATRIX_OPS(int16_t)
SLANG_INT_MATRIX_OPS(int64_t)
SLANG_INT_MATRIX_OPS(uint)
SLANG_INT_MATRIX_OPS(uint8_t)
SLANG_INT_MATRIX_OPS(uint16_t)
SLANG_INT_MATRIX_OPS(uint64_t)
SLANG_FLOAT_MATRIX_OPS(float)
SLANG_FLOAT_MATRIX_OPS(double)
#define SLANG_MATRIX_INT_NEG_OP(T) \
template<int R, int C> \
SLANG_FORCE_INLINE Matrix<T, R, C> operator-(Matrix<T, R, C> thisVal) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result.rows[i][j] = 0 - thisVal.rows[i][j]; \
return result; \
}
SLANG_MATRIX_INT_NEG_OP(int)
SLANG_MATRIX_INT_NEG_OP(int8_t)
SLANG_MATRIX_INT_NEG_OP(int16_t)
SLANG_MATRIX_INT_NEG_OP(int64_t)
SLANG_MATRIX_INT_NEG_OP(uint)
SLANG_MATRIX_INT_NEG_OP(uint8_t)
SLANG_MATRIX_INT_NEG_OP(uint16_t)
SLANG_MATRIX_INT_NEG_OP(uint64_t)
#define SLANG_FLOAT_MATRIX_MOD(T) \
template<int R, int C> \
SLANG_FORCE_INLINE Matrix<T, R, C> operator%(Matrix<T, R, C> left, Matrix<T, R, C> right) \
{ \
Matrix<T, R, C> result; \
for (int i = 0; i < R; i++) \
for (int j = 0; j < C; j++) \
result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]); \
return result; \
}
SLANG_FLOAT_MATRIX_MOD(float)
SLANG_FLOAT_MATRIX_MOD(double)
#undef SLANG_FLOAT_MATRIX_MOD
#undef SLANG_MATRIX_BINARY_OP
#undef SLANG_MATRIX_UNARY_OP
#undef SLANG_INT_MATRIX_OPS
#undef SLANG_FLOAT_MATRIX_OPS
#undef SLANG_MATRIX_INT_NEG_OP
#undef SLANG_FLOAT_MATRIX_MOD
template<typename TResult, typename TInput>
TResult slang_bit_cast(TInput val)
{
return *(TResult*)(&val);
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,8 +0,0 @@
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
#endif
#ifndef __DXC_VERSION_MAJOR
// warning X3557: loop doesn't seem to do anything, forcing loop to unroll
#pragma warning(disable : 3557)
#endif

View file

@ -1,49 +0,0 @@
// slang-image-format-defs.h
#ifndef SLANG_FORMAT
#error Must define SLANG_FORMAT macro before including image-format-defs.h
#endif
SLANG_FORMAT(unknown, (NONE, 0, 0))
SLANG_FORMAT(rgba32f, (FLOAT32, 4, sizeof(float) * 4))
SLANG_FORMAT(rgba16f, (FLOAT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rg32f, (FLOAT32, 2, sizeof(float) * 2))
SLANG_FORMAT(rg16f, (FLOAT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(r11f_g11f_b10f, (NONE, 3, sizeof(uint32_t)))
SLANG_FORMAT(r32f, (FLOAT32, 1, sizeof(float)))
SLANG_FORMAT(r16f, (FLOAT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(rgba16, (UINT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rgb10_a2, (NONE, 4, sizeof(uint32_t)))
SLANG_FORMAT(rgba8, (UINT8, 4, sizeof(uint32_t)))
SLANG_FORMAT(rg16, (UINT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(rg8, (UINT8, 2, sizeof(char) * 2))
SLANG_FORMAT(r16, (UINT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(r8, (UINT8, 1, sizeof(uint8_t)))
SLANG_FORMAT(rgba16_snorm, (UINT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rgba8_snorm, (UINT8, 4, sizeof(uint8_t) * 4))
SLANG_FORMAT(rg16_snorm, (UINT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(rg8_snorm, (UINT8, 2, sizeof(uint8_t) * 2))
SLANG_FORMAT(r16_snorm, (UINT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(r8_snorm, (UINT8, 1, sizeof(uint8_t)))
SLANG_FORMAT(rgba32i, (INT32, 4, sizeof(int32_t) * 4))
SLANG_FORMAT(rgba16i, (INT16, 4, sizeof(int16_t) * 4))
SLANG_FORMAT(rgba8i, (INT8, 4, sizeof(int8_t) * 4))
SLANG_FORMAT(rg32i, (INT32, 2, sizeof(int32_t) * 2))
SLANG_FORMAT(rg16i, (INT16, 2, sizeof(int16_t) * 2))
SLANG_FORMAT(rg8i, (INT8, 2, sizeof(int8_t) * 2))
SLANG_FORMAT(r32i, (INT32, 1, sizeof(int32_t)))
SLANG_FORMAT(r16i, (INT16, 1, sizeof(int16_t)))
SLANG_FORMAT(r8i, (INT8, 1, sizeof(int8_t)))
SLANG_FORMAT(rgba32ui, (UINT32, 4, sizeof(uint32_t) * 4))
SLANG_FORMAT(rgba16ui, (UINT16, 4, sizeof(uint16_t) * 4))
SLANG_FORMAT(rgb10_a2ui, (NONE, 4, sizeof(uint32_t)))
SLANG_FORMAT(rgba8ui, (UINT8, 4, sizeof(uint8_t) * 4))
SLANG_FORMAT(rg32ui, (UINT32, 2, sizeof(uint32_t) * 2))
SLANG_FORMAT(rg16ui, (UINT16, 2, sizeof(uint16_t) * 2))
SLANG_FORMAT(rg8ui, (UINT8, 2, sizeof(uint8_t) * 2))
SLANG_FORMAT(r32ui, (UINT32, 1, sizeof(uint32_t)))
SLANG_FORMAT(r16ui, (UINT16, 1, sizeof(uint16_t)))
SLANG_FORMAT(r8ui, (UINT8, 1, sizeof(uint8_t)))
SLANG_FORMAT(r64ui, (UINT64, 1, sizeof(uint64_t)))
SLANG_FORMAT(r64i, (INT64, 1, sizeof(int64_t)))
#undef SLANG_FORMAT

View file

@ -1,404 +0,0 @@
#ifndef SLANG_LLVM_H
#define SLANG_LLVM_H
// TODO(JS):
// Disable exception declspecs, as not supported on LLVM without some extra options.
// We could enable with `-fms-extensions`
#define SLANG_DISABLE_EXCEPTIONS 1
#ifndef SLANG_PRELUDE_ASSERT
#ifdef SLANG_PRELUDE_ENABLE_ASSERT
extern "C" void assertFailure(const char* msg);
#define SLANG_PRELUDE_EXPECT(VALUE, MSG) \
if (VALUE) \
{ \
} \
else \
assertFailure("assertion failed: '" MSG "'")
#define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE)
#else // SLANG_PRELUDE_ENABLE_ASSERT
#define SLANG_PRELUDE_EXPECT(VALUE, MSG)
#define SLANG_PRELUDE_ASSERT(x)
#endif // SLANG_PRELUDE_ENABLE_ASSERT
#endif
/*
Taken from stddef.h
*/
typedef __PTRDIFF_TYPE__ ptrdiff_t;
typedef __SIZE_TYPE__ size_t;
typedef __SIZE_TYPE__ rsize_t;
// typedef __WCHAR_TYPE__ wchar_t;
#if defined(__need_NULL)
#undef NULL
#ifdef __cplusplus
#if !defined(__MINGW32__) && !defined(_MSC_VER)
#define NULL __null
#else
#define NULL 0
#endif
#else
#define NULL ((void*)0)
#endif
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
namespace std
{
typedef decltype(nullptr) nullptr_t;
}
using ::std::nullptr_t;
#endif
#endif
#undef __need_NULL
#endif /* defined(__need_NULL) */
/*
The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed.
*/
// LLVM/Clang types such that we can use LLVM/Clang without headers for C++ output from Slang
#ifdef __INT64_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
typedef __INT64_TYPE__ int64_t;
#endif /* __int8_t_defined */
typedef __UINT64_TYPE__ uint64_t;
#define __int_least64_t int64_t
#define __uint_least64_t uint64_t
#endif /* __INT64_TYPE__ */
#ifdef __int_least64_t
typedef __int_least64_t int_least64_t;
typedef __uint_least64_t uint_least64_t;
typedef __int_least64_t int_fast64_t;
typedef __uint_least64_t uint_fast64_t;
#endif /* __int_least64_t */
#ifdef __INT32_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
typedef __INT32_TYPE__ int32_t;
#endif /* __int8_t_defined */
#ifndef __uint32_t_defined /* more glibc compatibility */
#define __uint32_t_defined
typedef __UINT32_TYPE__ uint32_t;
#endif /* __uint32_t_defined */
#define __int_least32_t int32_t
#define __uint_least32_t uint32_t
#endif /* __INT32_TYPE__ */
#ifdef __int_least32_t
typedef __int_least32_t int_least32_t;
typedef __uint_least32_t uint_least32_t;
typedef __int_least32_t int_fast32_t;
typedef __uint_least32_t uint_fast32_t;
#endif /* __int_least32_t */
#ifdef __INT16_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/
typedef __INT16_TYPE__ int16_t;
#endif /* __int8_t_defined */
typedef __UINT16_TYPE__ uint16_t;
#define __int_least16_t int16_t
#define __uint_least16_t uint16_t
#endif /* __INT16_TYPE__ */
#ifdef __int_least16_t
typedef __int_least16_t int_least16_t;
typedef __uint_least16_t uint_least16_t;
typedef __int_least16_t int_fast16_t;
typedef __uint_least16_t uint_fast16_t;
#endif /* __int_least16_t */
#ifdef __INT8_TYPE__
#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/
typedef __INT8_TYPE__ int8_t;
#endif /* __int8_t_defined */
typedef __UINT8_TYPE__ uint8_t;
#define __int_least8_t int8_t
#define __uint_least8_t uint8_t
#endif /* __INT8_TYPE__ */
#ifdef __int_least8_t
typedef __int_least8_t int_least8_t;
typedef __uint_least8_t uint_least8_t;
typedef __int_least8_t int_fast8_t;
typedef __uint_least8_t uint_fast8_t;
#endif /* __int_least8_t */
/* prevent glibc sys/types.h from defining conflicting types */
#ifndef __int8_t_defined
#define __int8_t_defined
#endif /* __int8_t_defined */
/* C99 7.18.1.4 Integer types capable of holding object pointers.
*/
#define __stdint_join3(a, b, c) a##b##c
#ifndef _INTPTR_T
#ifndef __intptr_t_defined
typedef __INTPTR_TYPE__ intptr_t;
#define __intptr_t_defined
#define _INTPTR_T
#endif
#endif
#ifndef _UINTPTR_T
typedef __UINTPTR_TYPE__ uintptr_t;
#define _UINTPTR_T
#endif
/* C99 7.18.1.5 Greatest-width integer types.
*/
typedef __INTMAX_TYPE__ intmax_t;
typedef __UINTMAX_TYPE__ uintmax_t;
/* C99 7.18.4 Macros for minimum-width integer constants.
*
* The standard requires that integer constant macros be defined for all the
* minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width
* types are required, the corresponding integer constant macros are defined
* here. This implementation also defines minimum-width types for every other
* integer width that the target implements, so corresponding macros are
* defined below, too.
*
* These macros are defined using the same successive-shrinking approach as
* the type definitions above. It is likewise important that macros are defined
* in order of decending width.
*
* Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the
* claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
*/
#define __int_c_join(a, b) a##b
#define __int_c(v, suffix) __int_c_join(v, suffix)
#define __uint_c(v, suffix) __int_c_join(v##U, suffix)
#ifdef __INT64_TYPE__
#ifdef __INT64_C_SUFFIX__
#define __int64_c_suffix __INT64_C_SUFFIX__
#else
#undef __int64_c_suffix
#endif /* __INT64_C_SUFFIX__ */
#endif /* __INT64_TYPE__ */
#ifdef __int_least64_t
#ifdef __int64_c_suffix
#define INT64_C(v) __int_c(v, __int64_c_suffix)
#define UINT64_C(v) __uint_c(v, __int64_c_suffix)
#else
#define INT64_C(v) v
#define UINT64_C(v) v##U
#endif /* __int64_c_suffix */
#endif /* __int_least64_t */
#ifdef __INT32_TYPE__
#ifdef __INT32_C_SUFFIX__
#define __int32_c_suffix __INT32_C_SUFFIX__
#else
#undef __int32_c_suffix
#endif /* __INT32_C_SUFFIX__ */
#endif /* __INT32_TYPE__ */
#ifdef __int_least32_t
#ifdef __int32_c_suffix
#define INT32_C(v) __int_c(v, __int32_c_suffix)
#define UINT32_C(v) __uint_c(v, __int32_c_suffix)
#else
#define INT32_C(v) v
#define UINT32_C(v) v##U
#endif /* __int32_c_suffix */
#endif /* __int_least32_t */
#ifdef __INT16_TYPE__
#ifdef __INT16_C_SUFFIX__
#define __int16_c_suffix __INT16_C_SUFFIX__
#else
#undef __int16_c_suffix
#endif /* __INT16_C_SUFFIX__ */
#endif /* __INT16_TYPE__ */
#ifdef __int_least16_t
#ifdef __int16_c_suffix
#define INT16_C(v) __int_c(v, __int16_c_suffix)
#define UINT16_C(v) __uint_c(v, __int16_c_suffix)
#else
#define INT16_C(v) v
#define UINT16_C(v) v##U
#endif /* __int16_c_suffix */
#endif /* __int_least16_t */
#ifdef __INT8_TYPE__
#ifdef __INT8_C_SUFFIX__
#define __int8_c_suffix __INT8_C_SUFFIX__
#else
#undef __int8_c_suffix
#endif /* __INT8_C_SUFFIX__ */
#endif /* __INT8_TYPE__ */
#ifdef __int_least8_t
#ifdef __int8_c_suffix
#define INT8_C(v) __int_c(v, __int8_c_suffix)
#define UINT8_C(v) __uint_c(v, __int8_c_suffix)
#else
#define INT8_C(v) v
#define UINT8_C(v) v##U
#endif /* __int8_c_suffix */
#endif /* __int_least8_t */
/* C99 7.18.2.1 Limits of exact-width integer types.
* C99 7.18.2.2 Limits of minimum-width integer types.
* C99 7.18.2.3 Limits of fastest minimum-width integer types.
*
* The presence of limit macros are completely optional in C99. This
* implementation defines limits for all of the types (exact- and
* minimum-width) that it defines above, using the limits of the minimum-width
* type for any types that do not have exact-width representations.
*
* As in the type definitions, this section takes an approach of
* successive-shrinking to determine which limits to use for the standard (8,
* 16, 32, 64) bit widths when they don't have exact representations. It is
* therefore important that the definitions be kept in order of decending
* widths.
*
* Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the
* claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).
*/
#ifdef __INT64_TYPE__
#define INT64_MAX INT64_C(9223372036854775807)
#define INT64_MIN (-INT64_C(9223372036854775807) - 1)
#define UINT64_MAX UINT64_C(18446744073709551615)
#define __INT_LEAST64_MIN INT64_MIN
#define __INT_LEAST64_MAX INT64_MAX
#define __UINT_LEAST64_MAX UINT64_MAX
#endif /* __INT64_TYPE__ */
#ifdef __INT_LEAST64_MIN
#define INT_LEAST64_MIN __INT_LEAST64_MIN
#define INT_LEAST64_MAX __INT_LEAST64_MAX
#define UINT_LEAST64_MAX __UINT_LEAST64_MAX
#define INT_FAST64_MIN __INT_LEAST64_MIN
#define INT_FAST64_MAX __INT_LEAST64_MAX
#define UINT_FAST64_MAX __UINT_LEAST64_MAX
#endif /* __INT_LEAST64_MIN */
#ifdef __INT32_TYPE__
#define INT32_MAX INT32_C(2147483647)
#define INT32_MIN (-INT32_C(2147483647) - 1)
#define UINT32_MAX UINT32_C(4294967295)
#define __INT_LEAST32_MIN INT32_MIN
#define __INT_LEAST32_MAX INT32_MAX
#define __UINT_LEAST32_MAX UINT32_MAX
#endif /* __INT32_TYPE__ */
#ifdef __INT_LEAST32_MIN
#define INT_LEAST32_MIN __INT_LEAST32_MIN
#define INT_LEAST32_MAX __INT_LEAST32_MAX
#define UINT_LEAST32_MAX __UINT_LEAST32_MAX
#define INT_FAST32_MIN __INT_LEAST32_MIN
#define INT_FAST32_MAX __INT_LEAST32_MAX
#define UINT_FAST32_MAX __UINT_LEAST32_MAX
#endif /* __INT_LEAST32_MIN */
#ifdef __INT16_TYPE__
#define INT16_MAX INT16_C(32767)
#define INT16_MIN (-INT16_C(32767) - 1)
#define UINT16_MAX UINT16_C(65535)
#define __INT_LEAST16_MIN INT16_MIN
#define __INT_LEAST16_MAX INT16_MAX
#define __UINT_LEAST16_MAX UINT16_MAX
#endif /* __INT16_TYPE__ */
#ifdef __INT_LEAST16_MIN
#define INT_LEAST16_MIN __INT_LEAST16_MIN
#define INT_LEAST16_MAX __INT_LEAST16_MAX
#define UINT_LEAST16_MAX __UINT_LEAST16_MAX
#define INT_FAST16_MIN __INT_LEAST16_MIN
#define INT_FAST16_MAX __INT_LEAST16_MAX
#define UINT_FAST16_MAX __UINT_LEAST16_MAX
#endif /* __INT_LEAST16_MIN */
#ifdef __INT8_TYPE__
#define INT8_MAX INT8_C(127)
#define INT8_MIN (-INT8_C(127) - 1)
#define UINT8_MAX UINT8_C(255)
#define __INT_LEAST8_MIN INT8_MIN
#define __INT_LEAST8_MAX INT8_MAX
#define __UINT_LEAST8_MAX UINT8_MAX
#endif /* __INT8_TYPE__ */
#ifdef __INT_LEAST8_MIN
#define INT_LEAST8_MIN __INT_LEAST8_MIN
#define INT_LEAST8_MAX __INT_LEAST8_MAX
#define UINT_LEAST8_MAX __UINT_LEAST8_MAX
#define INT_FAST8_MIN __INT_LEAST8_MIN
#define INT_FAST8_MAX __INT_LEAST8_MAX
#define UINT_FAST8_MAX __UINT_LEAST8_MAX
#endif /* __INT_LEAST8_MIN */
/* Some utility macros */
#define __INTN_MIN(n) __stdint_join3(INT, n, _MIN)
#define __INTN_MAX(n) __stdint_join3(INT, n, _MAX)
#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)
#define __INTN_C(n, v) __stdint_join3(INT, n, _C(v))
#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))
/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */
/* C99 7.18.3 Limits of other integer types. */
#define INTPTR_MIN (-__INTPTR_MAX__ - 1)
#define INTPTR_MAX __INTPTR_MAX__
#define UINTPTR_MAX __UINTPTR_MAX__
#define PTRDIFF_MIN (-__PTRDIFF_MAX__ - 1)
#define PTRDIFF_MAX __PTRDIFF_MAX__
#define SIZE_MAX __SIZE_MAX__
/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__
* is enabled. */
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
#define RSIZE_MAX (SIZE_MAX >> 1)
#endif
/* C99 7.18.2.5 Limits of greatest-width integer types. */
#define INTMAX_MIN (-__INTMAX_MAX__ - 1)
#define INTMAX_MAX __INTMAX_MAX__
#define UINTMAX_MAX __UINTMAX_MAX__
/* C99 7.18.3 Limits of other integer types. */
#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)
#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)
#ifdef __WINT_UNSIGNED__
#define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)
#define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)
#else
#define WINT_MIN __INTN_MIN(__WINT_WIDTH__)
#define WINT_MAX __INTN_MAX(__WINT_WIDTH__)
#endif
#ifndef WCHAR_MAX
#define WCHAR_MAX __WCHAR_MAX__
#endif
#ifndef WCHAR_MIN
#if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)
#define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)
#else
#define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)
#endif
#endif
/* 7.18.4.2 Macros for greatest-width integer constants. */
#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)
#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)
#endif // SLANG_LLVM_H

View file

@ -1 +0,0 @@
#define SLANG_TAG_VERSION "2025.3.1"

View file

@ -1,181 +0,0 @@
// Prelude for PyTorch cpp binding.
// clang-format off
#include <torch/extension.h>
// clang-format on
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAUtils.h>
#include <stdexcept>
#include <string>
#include <vector>
#ifdef SLANG_LLVM
#include "slang-llvm.h"
#else // SLANG_LLVM
#if SLANG_GCC_FAMILY && __GNUC__ < 6
#include <cmath>
#define SLANG_PRELUDE_STD std::
#else
#include <math.h>
#define SLANG_PRELUDE_STD
#endif
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#endif // SLANG_LLVM
#include "../source/core/slang-string.h"
#if defined(_MSC_VER)
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport)
#else
#define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default")))
// # define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport))
// __attribute__((__visibility__("default")))
#endif
#ifdef __cplusplus
#define SLANG_PRELUDE_EXTERN_C extern "C"
#define SLANG_PRELUDE_EXTERN_C_START \
extern "C" \
{
#define SLANG_PRELUDE_EXTERN_C_END }
#else
#define SLANG_PRELUDE_EXTERN_C
#define SLANG_PRELUDE_EXTERN_C_START
#define SLANG_PRELUDE_EXTERN_C_END
#endif
#define SLANG_PRELUDE_NAMESPACE
#ifndef SLANG_NO_THROW
#define SLANG_NO_THROW
#endif
#ifndef SLANG_STDCALL
#define SLANG_STDCALL
#endif
#ifndef SLANG_MCALL
#define SLANG_MCALL SLANG_STDCALL
#endif
#ifndef SLANG_FORCE_INLINE
#define SLANG_FORCE_INLINE inline
#endif
#include "slang-cpp-scalar-intrinsics.h"
#include "slang-cpp-types-core.h"
static const int kSlangTorchTensorMaxDim = 5;
struct TensorView
{
uint8_t* data;
uint32_t strides[kSlangTorchTensorMaxDim];
uint32_t sizes[kSlangTorchTensorMaxDim];
uint32_t dimensionCount;
};
TensorView make_tensor_view(
torch::Tensor val,
const char* name,
torch::ScalarType targetScalarType,
bool requireContiguous)
{
// We're currently not trying to implicitly cast or transfer to device for two reasons:
// 1. There appears to be a bug with .to() where successive calls after the first one fail.
// 2. Silent casts like this can cause large memory allocations & unexpected overheads.
// It's better to be explicit.
// Expect tensors to be on CUDA device
if (!val.device().is_cuda())
throw std::runtime_error(
std::string(name).append(": tensor is not on CUDA device.").c_str());
// Expect tensors to be the right type.
if (val.dtype() != targetScalarType)
throw std::runtime_error(
std::string(name).append(": tensor is not of the expected type.").c_str());
// Check that the tensor is contiguous
if (requireContiguous && !val.is_contiguous())
throw std::runtime_error(std::string(name).append(": tensor is not contiguous.").c_str());
TensorView res = {};
res.dimensionCount = val.dim();
res.data = nullptr;
size_t elementSize = 4;
switch (val.scalar_type())
{
case torch::kInt8:
case torch::kUInt8:
elementSize = 1;
res.data = (uint8_t*)val.data_ptr<uint8_t>();
break;
case torch::kBFloat16:
elementSize = 2;
res.data = (uint8_t*)val.data_ptr<torch::BFloat16>();
break;
case torch::kFloat16:
elementSize = 2;
res.data = (uint8_t*)val.data_ptr<at::Half>();
break;
case torch::kInt16:
elementSize = 2;
res.data = (uint8_t*)val.data_ptr<int16_t>();
break;
case torch::kFloat32:
elementSize = 4;
res.data = (uint8_t*)val.data_ptr<float>();
break;
case torch::kInt32:
elementSize = 4;
res.data = (uint8_t*)val.data_ptr<int32_t>();
break;
case torch::kFloat64:
elementSize = 8;
res.data = (uint8_t*)val.data_ptr<double>();
break;
case torch::kInt64:
elementSize = 8;
res.data = (uint8_t*)val.data_ptr<int64_t>();
break;
case torch::kBool:
elementSize = 1;
res.data = (uint8_t*)val.data_ptr<bool>();
break;
}
if (val.dim() > kSlangTorchTensorMaxDim)
throw std::runtime_error(std::string(name)
.append(": number of dimensions exceeds limit (")
.append(std::to_string(kSlangTorchTensorMaxDim))
.append(")")
.c_str());
bool isEmpty = true;
for (int i = 0; i < val.dim(); ++i)
{
res.strides[i] = val.stride(i) * elementSize;
if (res.strides[i] == 0)
throw std::runtime_error(
std::string(name)
.append(": tensors with broadcasted dimensions are not supported (use "
"tensor.contiguous() to make tensor whole)")
.c_str());
res.sizes[i] = val.size(i);
if (res.sizes[i] > 0)
isEmpty = false;
}
if (!res.data && !isEmpty)
throw std::runtime_error(std::string(name).append(": data pointer is invalid.").c_str());
return res;
}
#define SLANG_PRELUDE_EXPORT

File diff suppressed because it is too large Load diff

View file

@ -1,156 +0,0 @@
Slang 64-bit Type Support
=========================
## Summary
* Not all targets support 64 bit types, or all 64 bit types
* 64 bit integers generally require later APIs/shader models
* When specifying 64 bit floating-point literals *always* use the type suffixes (ie `L`)
* An integer literal will be interpreted as 64 bits if it cannot fit in a 32 bit value.
* GPU target/s generally do not support all double intrinsics
* Typically missing are trascendentals (sin, cos etc), logarithm and exponential functions
* CUDA is the exception supporting nearly all double intrinsics
* D3D
* D3D targets *appear* to support double intrinsics (like sin, cos, log etc), but behind the scenes they are actually being converted to float
* When using D3D12, it is best to use DXIL if you use double because there are some serious issues around double and DXBC
* VK will produce an error in validation if a double intrinsic is used it does support (which is most of them)
* Vector and Matrix types have even spottier than scalar intrinsic support across targets
Overview
========
The Slang language supports 64 bit built in types. Such as
* `double`
* `uint64_t`
* `int64_t`
This also applies to vector and matrix versions of these types.
Unfortunately if a specific target supports the type or the typical HLSL intrinsic functions (such as sin/cos/max/min etc) depends very much on the target.
Special attention has to be made with respect to literal 64 bit types. By default float literals if they do not have an explicit suffix are assumed to be 32 bit. There is a variety of reasons for this design choice - the main one being around by default behavior of getting good performance. The suffixes required for 64 bit types are as follows
```
// double - 'l' or 'L'
double a = 1.34e-200L;
// WRONG!: This is the same as b = double(float(1.34e-200)) which will be 0. Will produce a warning.
double b = 1.34e-200;
// int64_t - 'll' or 'LL' (or combination of upper/lower)
int64_t c = -5436365345345234ll;
int64_t e = ~0LL; // Same as 0xffffffffffffffff
// uint64_t - 'ull' or 'ULL' (or combination of upper/lower)
uint64_t g = 0x8000000000000000ull;
uint64_t i = ~0ull; // Same as 0xffffffffffffffff
uint64_t j = ~0; // Equivalent to 'i' because uint64_t(int64_t(~int32_t(0)));
```
These issues are discussed more on issue [#1185](https://github.com/shader-slang/slang/issues/1185)
The type of a decimal non-suffixed integer literal is the first integer type from the list [`int`, `int64_t`]
which can represent the specified literal value. If the value cannot fit, the literal is represented as an `uint64_t`
and a warning is given.
The type of a hexadecimal non-suffixed integer literal is the first type from the list [`int`, `uint`, `int64_t`, `uint64_t`]
that can represent the specified literal value. A non-suffixed integer literal will be 64 bit if it cannot fit in 32 bits.
```
// Same as int64_t a = int(1), the value can fit into a 32 bit integer.
int64_t a = 1;
// Same as int64_t b = int64_t(2147483648), the value cannot fit into a 32 bit integer.
int64_t b = 2147483648;
// Same as int64_t c = uint64_t(18446744073709551615), the value is larger than the maximum value of a signed 64 bit
// integer, and is interpreted as an unsigned 64 bit integer. Warning is given.
uint64_t c = 18446744073709551615;
// Same as uint64_t = int(0x7FFFFFFF), the value can fit into a 32 bit integer.
uint64_t d = 0x7FFFFFFF;
// Same as uint64_t = int64_t(0x7FFFFFFFFFFFFFFF), the value cannot fit into an unsigned 32 bit integer but
// can fit into a signed 64 bit integer.
uint64_t e = 0x7FFFFFFFFFFFFFFF;
// Same as uint64_t = uint64_t(0xFFFFFFFFFFFFFFFF), the value cannot fit into a signed 64 bit integer, and
// is interpreted as an unsigned 64 bit integer.
uint64_t f = 0xFFFFFFFFFFFFFFFF;
```
Double support
==============
Target | Compiler/Binary | Double Type | Intrinsics | Notes
---------|------------------|----------------|-----------------------|-----------
CPU | | Yes | Yes | 1
CUDA | Nvrtx/PTX | Yes | Yes | 1
D3D12 | DXC/DXIL | Yes | Small Subset | 4
Vulkan | GlSlang/Spir-V | Yes | Partial | 2
D3D11 | FXC/DXBC | Yes | Small Subset | 4
D3D12 | FXC/DXBC | Yes | Small Subset | 3, 4
1) CUDA and CPU support most intrinsics, with the notable exception currently of matrix invert
2) In terms of lack of general intrinsic support, the restriction is described in https://www.khronos.org/registry/spir-v/specs/1.0/GLSL.std.450.html
The following intrinsics are available for Vulkan
`fmod` (as %), `rcp`, `sign`, `saturate`, `sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `abs`, `min`, `max`, `smoothstep`, `lerp`, `clamp`, `step` and `asuint`.
These are tested in the test `tests/hlsl-intrinsic/scalar-double-vk-intrinsic.slang`.
What is missing are transedentals, expX, logX.
Note that GlSlang does produce Spir-V that contains double intrinsic calls for the missing intrinsics, the failure happens when validating the Spir-V
```
Validation: error 0: [ UNASSIGNED-CoreValidation-Shader-InconsistentSpirv ] Object: VK_NULL_HANDLE (Type = 0) | SPIR-V module not valid: GLSL.std.450 Sin: expected Result Type to be a 16 or 32-bit scalar or vector float type
%57 = OpExtInst %double %1 Sin %56
```
3) That if a RWStructuredBuffer<double> is used on D3D12 with DXBC, and a double is written, it can lead to incorrect behavior. Thus it is recommended not to use double with dxbc, but to use dxil to keep things simple. A test showing this problem is `tests/bugs/dxbc-double-problem.slang`. The test `tests/hlsl-intrinsic/scalar-double-simple.slang` shows not using a double resource, doubles do appear to work on D3D12 DXBC.
4) If you compile code using double and intrinsics through Slang at first blush it will seem to work. Assuming there are no errors in your code, your code will even typically appear to work correctly. Unfortunately what is really happening is the backend compiler (fxc or dxc) compiler is narrowing double to float and then using float intrinsics. It typically generates a warning when this happens, but unless there is an error in your code you will not see these warnings because dxc doesn't appear to have a mechanism to return warnings if there isn't an error. This is why everything appears to work - but actually any intrinsic call is losing precision silently.
Note on dxc by default Slang disables warnings - warnings need to be enabled to see the narrowing warnings.
There is another exception around the use of % - if you do this with double it will return an error saying on float is supported.
It appears that no intrinsics are available for double with fxc.
On dxc the following intrinsics are available with double::
`rcp`, `sign`, `saturate`, `abs`, `min`, `max`, `clamp`, `asuint`.
These are tested in the test `tests/hlsl-intrinsic/scalar-double-d3d-intrinsic.slang`.
There is no support for transcendentals (`sin`, `cos` etc) or `log`/`exp`. More surprising is that `sqrt`, `rsqrt`, `frac`, `ceil`, `floor`, `trunc`, `step`, `lerp`, `smoothstep` are also not supported.
uint64_t and int64_t Support
============================
Target | Compiler/Binary | u/int64_t Type | Intrinsic support | Notes
---------|------------------|----------------|--------------------|--------
CPU | | Yes | Yes |
CUDA | Nvrtx/PTX | Yes | Yes |
Vulkan | GlSlang/Spir-V | Yes | Yes |
D3D12 | DXC/DXIL | Yes | Yes | 1
D3D11 | FXC/DXBC | No | No | 2
D3D12 | FXC/DXBC | No | No | 2
1) The [sm6.0 docs](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12) describe only supporting uint64_t, but dxc says int64_t is supported in [HLSL 2016](https://github.com/Microsoft/DirectXShaderCompiler/wiki/Language-Versions). Tests show that this is indeed the case.
2) uint64_t support requires https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12, so DXBC is not a target.
The intrinsics available on `uint64_t` type are `abs`, `min`, `max`, `clamp` and `countbits`.
The intrinsics available on `uint64_t` type are `abs`, `min`, `max` and `clamp`.
GLSL
====
GLSL/Spir-v based targets do not support 'generated' intrinsics on matrix types. For example 'sin(mat)' will not work on GLSL/Spir-v.

View file

@ -1,35 +0,0 @@
Slang Documentation
===================
This directory contains documentation for the Slang system.
Some of the documentation is intended for users of the language and compiler, while other documentation is intended for developers contributing to the project.
Getting Started
---------------
The Slang [User's Guide](https://shader-slang.github.io/slang/user-guide/) provides an introduction to the Slang language and its major features, as well as the compilation and reflection API.
There is also documentation specific to using the [slangc](https://shader-slang.github.io/slang/user-guide/compiling.html#command-line-compilation-with-slangc) command-line tool.
Advanced Users
--------------
For the benefit of advanced users we provide detailed documentation on how Slang compiles code for specific platforms.
The [target compatibility guide](target-compatibility.md) gives an overview of feature compatibility for targets.
The [CPU target guide](cpu-target.md) gives information on compiling Slang or C++ source into shared libraries/executables or functions that can be directly executed. It also covers how to generate C++ code from Slang source.
The [CUDA target guide](cuda-target.md) provides information on compiling Slang/HLSL or CUDA source. Slang can compile to equivalent CUDA source, as well as to PTX via the nvrtc CUDA compiler.
Contributors
------------
For contributors to the Slang project, the information under the [`design/`](design/) directory may help explain the rationale behind certain design decisions and help when ramping up in the codebase.
Research
--------
The Slang project is based on a long history of research work. While understanding this research is not necessary for working with Slang, it may be instructive for understanding the big-picture goals of the language, as well as why certain critical decisions were made.
A [paper](http://graphics.cs.cmu.edu/projects/slang/) on the Slang system was accepted into SIGGRAPH 2018, and it provides an overview of the language and the compiler implementation.
Yong He's [dissertation](http://graphics.cs.cmu.edu/projects/renderergenerator/yong_he_thesis.pdf) provided more detailed discussion of the design of the Slang system.

View file

@ -1 +0,0 @@
theme: jekyll-theme-tactile

View file

@ -1,137 +0,0 @@
{% capture headingsWorkspace %}
{% comment %}
Copyright (c) 2018 Vladimir "allejo" Jimenez
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
{% endcomment %}
{% comment %}
Version 1.0.9
https://github.com/allejo/jekyll-anchor-headings
"Be the pull request you wish to see in the world." ~Ben Balter
Usage:
{% include anchor_headings.html html=content anchorBody="#" %}
Parameters:
* html (string) - the HTML of compiled markdown generated by kramdown in Jekyll
Optional Parameters:
* beforeHeading (bool) : false - Set to true if the anchor should be placed _before_ the heading's content
* headerAttrs (string) : '' - Any custom HTML attributes that will be added to the heading tag; you may NOT use `id`;
the `%heading%` and `%html_id%` placeholders are available
* anchorAttrs (string) : '' - Any custom HTML attributes that will be added to the `<a>` tag; you may NOT use `href`,
`class` or `title`;
the `%heading%` and `%html_id%` placeholders are available
* anchorBody (string) : '' - The content that will be placed inside the anchor; the `%heading%` placeholder is
available
* anchorClass (string) : '' - The class(es) that will be used for each anchor. Separate multiple classes with a
space
* anchorTitle (string) : '' - The `title` attribute that will be used for anchors
* h_min (int) : 1 - The minimum header level to build an anchor for; any header lower than this value will be
ignored
* h_max (int) : 6 - The maximum header level to build an anchor for; any header greater than this value will be
ignored
* bodyPrefix (string) : '' - Anything that should be inserted inside of the heading tag _before_ its anchor and
content
* bodySuffix (string) : '' - Anything that should be inserted inside of the heading tag _after_ its anchor and
content
Output:
The original HTML with the addition of anchors inside of all of the h1-h6 headings.
{% endcomment %}
{% assign minHeader = include.h_min | default: 1 %}
{% assign maxHeader = include.h_max | default: 2 %}
{% assign beforeHeading = include.beforeHeading %}
{% assign nodes = include.html | split: '<h' %} {% capture edited_headings %}{% endcapture %} {% for _node in nodes
%} {% capture node %}{{ _node | strip }}{% endcapture %} {% if node=="" %} {% continue %} {% endif %} {% assign
nextChar=node | replace: '"' , '' | strip | slice: 0, 1 %} {% assign headerLevel=nextChar | times: 1 %} <!-- If
the level is cast to 0, it means it's not a h1-h6 tag, so let's see if we need to fix it -->
{% if headerLevel == 0 %}
<!-- Split up the node based on closing angle brackets and get the first one. -->
{% assign firstChunk = node | split: '>' | first %}
<!-- If the first chunk does NOT contain a '<', that means we've broken another HTML tag that starts with 'h' -->
{% unless firstChunk contains '<' %} {% capture node %}<h{{ node }}{% endcapture %} {% endunless %} {% capture
edited_headings %}{{ edited_headings }}{{ node }}{% endcapture %} {% continue %} {% endif %} {% capture
_closingTag %}</h{{ headerLevel }}>{% endcapture %}
{% assign _workspace = node | split: _closingTag %}
{% assign _idWorkspace = _workspace[0] | split: 'id="' %}
{% assign _idWorkspace = _idWorkspace[1] | split: '"' %}
{% assign html_id = _idWorkspace[0] %}
{% capture _hAttrToStrip %}{{ _workspace[0] | split: '>' | first }}>{% endcapture %}
{% assign header = _workspace[0] | replace: _hAttrToStrip, '' %}
<!-- Build the anchor to inject for our heading -->
{% capture anchor %}{% endcapture %}
{% if html_id and headerLevel >= minHeader and headerLevel <= maxHeader %} {% assign escaped_header=header |
strip_html %} {% if include.headerAttrs %} {% capture _hAttrToStrip %}{{ _hAttrToStrip | split: '>' |
first }} {{ include.headerAttrs | replace: '%heading%' , escaped_header | replace: '%html_id%' , html_id
}}>{% endcapture %}
{% endif %}
{% capture anchor %}href="#{{ html_id }}"{% endcapture %}
{% if include.anchorClass %}
{% capture anchor %}{{ anchor }} class="{{ include.anchorClass }}"{% endcapture %}
{% endif %}
{% if include.anchorTitle %}
{% capture anchor %}{{ anchor }} title="{{ include.anchorTitle | replace: '%heading%', escaped_header
}}"{% endcapture %}
{% endif %}
{% if include.anchorAttrs %}
{% capture anchor %}{{ anchor }} {{ include.anchorAttrs | replace: '%heading%', escaped_header |
replace: '%html_id%', html_id }}{% endcapture %}
{% endif %}
{% capture anchor %}<a {{ anchor }}>{{ include.anchorBody | replace: '%heading%', escaped_header |
default: '' }}</a>{% endcapture %}
<!-- In order to prevent adding extra space after a heading, we'll let the 'anchor' value contain it -->
{% if beforeHeading %}
{% capture anchor %}{{ anchor }} {% endcapture %}
{% else %}
{% capture anchor %} {{ anchor }}{% endcapture %}
{% endif %}
{% endif %}
{% capture new_heading %}
<h{{ _hAttrToStrip }} {{ include.bodyPrefix }} {% if beforeHeading %} {{ anchor }}{{ header }} {% else
%} {{ header }}{{ anchor }} {% endif %} {{ include.bodySuffix }} </h{{ headerLevel }}>
{% endcapture %}
<!--
If we have content after the `</hX>` tag, then we'll want to append that here so we don't lost any content.
-->
{% assign chunkCount = _workspace | size %}
{% if chunkCount > 1 %}
{% capture new_heading %}{{ new_heading }}{{ _workspace | last }}{% endcapture %}
{% endif %}
{% capture edited_headings %}{{ edited_headings }}{{ new_heading }}{% endcapture %}
{% endfor %}
{% endcapture %}{% assign headingsWorkspace = '' %}{{ edited_headings | strip }}

View file

@ -1,417 +0,0 @@
<!DOCTYPE html>
<html lang="{{ site.lang | default: " en-US" }}">
<head>
<meta charset='utf-8'>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
<link rel="stylesheet" type="text/css" href="{{ '/assets/css/print.css' | relative_url }}" media="print">
<script async src="https://www.googletagmanager.com/gtag/js?id=G-TMTZVLLMBP"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-TMTZVLLMBP');
</script>
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
<style>
#centeringDiv {
margin: auto;
max-width: 1200px;
}
#navDiv
{
display: block;
box-sizing: border-box;
padding-top: 5px;
padding-bottom: 5px;
border-bottom-width: 3px;
border-bottom-style: solid;
border-bottom-color: #F0F0F0;
}
#navDiv nav
{
float:left;
}
#navDiv::after {
content: "";
clear: both;
display: table;
}
#navDiv nav li::after
{
content: "/";
padding-left: 10px;
padding-right: 0px;
color: #808080;
}
#navDiv nav li
{
display:inline;
padding-left: 10px;
padding-right: 0px;
}
#tocColumn {
width: 350px;
position: fixed;
overflow-y: auto;
box-sizing: border-box;
display: block;
}
#tocInner {
padding: 20px;
}
#rightColumn {
padding-left: 390px;
padding-right: 40px;
padding-top: 20px;
}
.toc_root_list {
list-style-type: none;
list-style-position: outside;
background-color: initial;
padding-left: 0px;
}
.toc_list {
padding-left: 16px;
background-color: initial;
list-style-type: none;
margin-bottom: 0px;
}
.toc_item {
cursor: pointer;
user-select: none;
list-style-type: none;
padding-left: 0px;
padding-top: 5px;
}
.toc_item_expanded::before {
content: "\25be";
cursor: pointer;
}
.toc_item_collapsed::before {
content: "\25b8";
cursor: pointer;
}
.toc_item_leaf {
padding-left: 14px;
cursor: pointer;
list-style-type: none;
}
.toc_span:hover
{
color: #d5000d;
}
.tocIcon
{
vertical-align: -2.5px;
}
.editButton
{
float: right;
margin-right: 10px;
color:#808080;
}
.editIcon
{
fill: currentColor;
vertical-align: text-top;
}
#btnToggleTOC {
display: none;
width: fit-content;
margin-left: 10px;
margin-top: 10px;
padding: 10px;
border-style: solid;
border-color: #808080;
border-width: 1px;
background-color: #E8E8E8;
}
#btnToggleTOC:hover {
background-color: #F0F0E8;
}
#btnToggleTOC:active {
background-color: #D4D4D4;
}
@media screen and (max-width: 900px) {
#tocColumn {
width: 300px;
display: block;
box-sizing: border-box;
}
#rightColumn {
padding-left: 320px;
padding-right: 20px;
}
}
@media screen and (max-width: 700px) {
#tocColumn {
width: 100%;
position: static;
display: none;
border-right-style: none;
box-sizing: content-box;
}
#tocInner {
padding: 10px;
}
#rightColumn {
padding-left: 10px;
padding-right: 10px;
}
#centeringDiv {
padding-left: 0px;
}
#btnToggleTOC {
display: block;
}
}
</style>
{% seo %}
</head>
<body>
<div id="centeringDiv">
<div id="navDiv">
{% include_relative nav.html %}
<a class="editButton" title="Edit this page" href="https://github.com/{{ site.github.repository_nwo }}/edit/master/docs/{{ page.path }}">
<svg class="editIcon" height="16" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
<path fill-rule="evenodd"
d="M11.013 1.427a1.75 1.75 0 012.474 0l1.086 1.086a1.75 1.75 0 010 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 01-.927-.928l.929-3.25a1.75 1.75 0 01.445-.758l8.61-8.61zm1.414 1.06a.25.25 0 00-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 000-.354l-1.086-1.086zM11.189 6.25L9.75 4.81l-6.286 6.287a.25.25 0 00-.064.108l-.558 1.953 1.953-.558a.249.249 0 00.108-.064l6.286-6.286z">
</path>
</svg>
</a>
</div>
<button id="btnToggleTOC" onclick="toggleTOC()">
<svg height="16" class="tocIcon" viewBox="0 0 16 16" version="1.1" width="16" aria-hidden="true">
<path fill-rule="evenodd"
d="M2 4a1 1 0 100-2 1 1 0 000 2zm3.75-1.5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zM3 8a1 1 0 11-2 0 1 1 0 012 0zm-1 6a1 1 0 100-2 1 1 0 000 2z">
</path>
</svg>
Table of Contents</button>
<div id="tocColumn">
<div id="tocInner">
{% include_relative toc.html %}
</div>
</div>
<div id="rightColumn">
<section id="main_content">
{% include anchor_headings.html html=content anchorBody="" %}
</section>
<a href="javascript:;" id="_content_end_"></a>
<footer>
{% if site.github.is_project_page %}
{{ site.title | default: site.github.repository_name }} is maintained by <a
href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a><br>
{% endif %}
This page was generated by <a href="https://pages.github.com">GitHub Pages</a>.
</footer>
</div>
</div>
<script>
// Fix for IE. Make sure String has `startsWith` method.
if (!String.prototype.startsWith)
{
String.prototype.startsWith = function (searchString, position) {
position = position || 0;
return this.indexOf(searchString, position) === position;
};
}
var tocColumn = document.getElementById("tocColumn");
var rightColumn = document.getElementById("rightColumn");
function updateScroll()
{
if (window.innerWidth < 700)
{
tocColumn.style.height = "";
return;
}
var top = Math.max(0, rightColumn.getBoundingClientRect().top);
tocColumn.style.top = top + "px";
tocColumn.style.height = (window.innerHeight-top) + "px";
}
function updatePosition()
{
if (window.innerWidth > 700)
tocColumn.style.display = "";
tocColumn.style.left = rightColumn.getBoundingClientRect().left + "px";
updateScroll();
}
window.addEventListener("resize", updatePosition);
updatePosition();
var tocItemsArray = [];
var subSectionItems = [];
var selectedItem = null;
function toggleTOC() {
var tocColumn = document.getElementById("tocColumn");
if (tocColumn.style.display == "block")
tocColumn.style.display = "none";
else
tocColumn.style.display = "block";
event.stopPropagation();
}
function expandItem(e) {
if (e == selectedItem)
e.style["font-weight"] = "bold";
var childList = e.getElementsByClassName("toc_list");
if (childList.length == 0)
return;
childList[0].style.display = "block";
childList[0].style["font-weight"] = "normal";
e.setAttribute("class", "toc_item toc_item_expanded");
}
function collapseItem(e) {
var childList = e.getElementsByClassName("toc_list");
if (childList.length == 0)
return;
childList[0].style.display = "none";
e.setAttribute("class", "toc_item toc_item_collapsed");
}
function tocSpanOnClick(e)
{
if (event.srcElement != null && event.srcElement.parentElement != null)
{
var link = event.srcElement.parentElement.getAttribute("data-link");
if (link != null)
{
var poundIndex = link.indexOf("#");
if (poundIndex == -1)
window.location.href = link + ".html";
else
window.location.href = link.substr(0, poundIndex) + ".html#" + link.substr(poundIndex+1, link.length - poundIndex - 1);
}
}
event.stopPropagation();
}
function tocItemOnClick(e)
{
if (event.srcElement == null) return;
// Toggle expanded/collapsed state.
if (event.srcElement.getAttribute("class").endsWith("toc_item_collapsed"))
expandItem(event.srcElement);
else if (event.srcElement.getAttribute("class").endsWith("toc_item_expanded"))
collapseItem(event.srcElement);
event.stopPropagation();
}
var path = window.location.pathname;
var pageName = path.split("/").pop();
var currentPageID = pageName.substr(0, pageName.lastIndexOf("."));
if (currentPageID.length == 0)
currentPageID = "index";
var tocLists = document.getElementsByClassName("toc_root_list");
for (var i = 0; i < tocLists.length; i++) {
var tocList = tocLists[i];
var items = tocList.getElementsByTagName("li")
for (var j = 0; j < items.length; j++)
tocItemsArray.push(items[j]);
}
for (var i = 0; i < tocItemsArray.length; i++) {
var item = tocItemsArray[i];
if (item.getAttribute("data-link") == currentPageID)
selectedItem = item;
if (item.getElementsByTagName("li").length != 0) {
collapseItem(item);
}
else {
item.setAttribute("class", "toc_item toc_item_leaf");
}
item.addEventListener("click", tocItemOnClick);
var innerSpan = item.getElementsByTagName("span");
if (innerSpan.length != 0)
{
innerSpan[0].addEventListener("click", tocSpanOnClick);
innerSpan[0].setAttribute("class", "toc_span");
}
}
var curItem = selectedItem;
while (curItem != null) {
expandItem(curItem);
curItem = curItem.parentElement;
if (curItem != null && curItem.getAttribute("class") != null &&
curItem.getAttribute("class").startsWith("toc_list"))
curItem = curItem.parentElement;
if (curItem != null && curItem.getAttribute("class") != null &&
curItem.getAttribute("class").startsWith("toc_root_list"))
break;
}
var subItems = selectedItem.getElementsByTagName("li");
var subSectionTitles = [];
var subSectionTitleStrs = [];
for (var i = 0; i < subItems.length; i++)
{
subSectionItems.push(subItems[i]);
var title = subItems[i].getAttribute("data-link");
var pos = title.lastIndexOf("#");
title = title.substr(pos + 1);
var element = document.getElementById(title);
subSectionTitles.push(element);
subSectionTitleStrs.push(title);
}
subSectionTitles.push(document.getElementById("_content_end_"));
function isSectionFullyVisible(id)
{
var titleElement = subSectionTitles[id];
var nextTitleElement = subSectionTitles[id+1];
return (titleElement.getBoundingClientRect().top >= 0 && nextTitleElement.getBoundingClientRect().top <= window.innerHeight);
}
function findCurrentSubsection()
{
var currentSubsectionID = -1;
for (var i = 0; i < subSectionItems.length; i++) {
var titleElement = subSectionTitles[i];
if (titleElement == null)
continue;
if (titleElement.getBoundingClientRect().top < window.innerHeight * 0.12)
currentSubsectionID = i;
}
return currentSubsectionID;
}
function updateCurrentSubsection(currentSubsectionID)
{
for (var i = 0; i < subSectionItems.length; i++)
{
if (i == currentSubsectionID || isSectionFullyVisible(i))
subSectionItems[i].getElementsByTagName("span")[0].style["font-weight"] = 600;
else
subSectionItems[i].getElementsByTagName("span")[0].style["font-weight"] = 400;
}
}
function windowScroll(e)
{
updateCurrentSubsection(findCurrentSubsection());
updateScroll();
}
window.addEventListener("scroll", windowScroll);
updateCurrentSubsection(findCurrentSubsection());
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ['$$','$$'], ["\\(","\\)"] ],
displayMath: [ ['$$','$$'], ["\\(","\\)"] ],
},
TeX: {
Macros: {
bra: ["\\langle{#1}|", 1],
ket: ["|{#1}\\rangle", 1],
braket: ["\\langle{#1}\\rangle", 1],
bk: ["\\langle{#1}|{#2}|{#3}\\rangle", 3]
}
}
});
</script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
</body>
</html>

View file

@ -1,203 +0,0 @@
---
---
@import "{{ site.theme }}";
a:hover {
text-decoration: underline;
}
h3 {
color: #363636;
}
h4 {
color: #363636;
}
blockquote {
background-color: #f2f2f2;
padding-top: 10px;
padding-bottom: 5px;
}
blockquote p {
font-size: 16px;
font-weight: 400;
margin-bottom: 5px;
color: #202020;
}
body {
color: initial;
text-shadow: none;
background: none;
}
#container
{
background:none;
}
.highlight .cm {
color: #148b04;
}
.highlight .cp {
color: #148b04;
}
.highlight .c1 {
color: #148b04;
}
.highlight .cs {
color: #148b04;
}
.highlight .c, .highlight .ch, .highlight .cd, .highlight .cpf {
color: #148b04;
}
.highlight .err {
color: #a61717;
background-color: #e3d2d2;
}
.highlight .gd {
color: #000000;
background-color: #ffdddd;
}
.highlight .ge {
color: #000000;
font-style: italic;
}
.highlight .gr {
color: #aa0000;
}
.highlight .gh {
color: #999999;
}
.highlight .gi {
color: #000000;
background-color: #ddffdd;
}
.highlight .go {
color: #888888;
}
.highlight .gp {
color: #555555;
}
.highlight .gu {
color: #aaaaaa;
}
.highlight .gt {
color: #aa0000;
}
.highlight .kc {
color: #1243d4;
}
.highlight .kd {
color: #1243d4;
}
.highlight .kn {
color: #1243d4;
}
.highlight .kp {
color: #1243d4;
}
.highlight .kr {
color: #1243d4;
}
.highlight .kt {
color: #1243d4;
}
.highlight .k, .highlight .kv {
color: #1243d4;
}
.highlight .m, .highlight .mb, .highlight .mx, .highlight .mi, .highlight .mf {
color: #7211c2;
}
.highlight .sa {
color: #000000;
}
.highlight .sb {
color: #d14;
}
.highlight .sc {
color: #d14;
}
.highlight .sd {
color: #d14;
}
.highlight .s2 {
color: #d14;
}
.highlight .se {
color: #d14;
}
.highlight .sh {
color: #d14;
}
.highlight .si {
color: #d14;
}
.highlight .sx {
color: #d14;
}
.highlight .sr {
color: #009926;
}
.highlight .s1 {
color: #d14;
}
.highlight .ss {
color: #990073;
}
.highlight .s, .highlight .dl {
color: #d14;
}
.highlight .na {
color: #008080;
}
.highlight .bp {
color: #999999;
}
.highlight .n{
color: black;
}
.highlight .nc {
color: #11abb9;
}
.highlight .nt {
color: #11abb9;
}
.highlight .vc {
color: #008080;
}
.highlight .vg {
color: #008080;
}
.highlight .vi {
color: #008080;
}
.highlight .nv, .highlight .vm {
color: #008080;
}
.highlight .ow {
color: #000000;
}
.highlight .o {
color: #000000;
}
.highlight .w {
color: #000000;
}
.highlight .p {color:#000000;}
code
{
background-color: initial;
border:none;
}
pre{
color: #000000;
background: #F8F8F8;
}
pre code {
color: #000000;
background-color: #F8F8F8;
}
.highlight
{
background: #F8F8F8;
}

View file

@ -1,62 +0,0 @@
# This script uses `slangc` to generate the core module reference documentation and push the updated
# documents to shader-slang/stdlib-reference repository.
# The stdlib-reference repository has github-pages setup so that the markdown files we generate
# in this step will be rendered as html pages by Jekyll upon a commit to the repository.
# So we we need to do here is to pull the stdlib-reference repository, regenerate the markdown files
# and push the changes back to the repository.
# The generated markdown files will be located in three folders:
# - ./global-decls
# - ./interfaces
# - ./types
# In addition, slangc will generate a table of content file `toc.html` which will be copied to
# ./_includes/stdlib-reference-toc.html for Jekyll for consume it correctly.
# If stdlib-reference folder does not exist, clone from github repo
if (-not (Test-Path ".\stdlib-reference")) {
git clone https://github.com/shader-slang/stdlib-reference/
}
else {
# If it already exist, just pull the latest changes.
cd stdlib-reference
git pull
cd ../
}
# Remove the old generated files.
Remove-Item -Path ".\stdlib-reference\global-decls" -Recurse -Force
Remove-Item -Path ".\stdlib-reference\interfaces" -Recurse -Force
Remove-Item -Path ".\stdlib-reference\types" -Recurse -Force
Remove-Item -Path ".\stdlib-reference\attributes" -Recurse -Force
# Use git describe to produce a version string and write it to _includes/version.inc.
# This file will be included by the stdlib-reference Jekyll template.
git describe --tags | Out-File -FilePath ".\stdlib-reference\_includes\version.inc" -Encoding ASCII
cd stdlib-reference
$slangPaths = @(
"../../build/RelWithDebInfo/bin/slangc.exe",
"../../build/Release/bin/slangc.exe",
"../../build/Debug/bin/slangc.exe"
)
$slangExe = $slangPaths | Where-Object { Test-Path $_ } | Select-Object -First 1
if ($slangExe) {
& $slangExe -compile-core-module -doc
Move-Item -Path ".\toc.html" -Destination ".\_includes\stdlib-reference-toc.html" -Force
git config user.email "bot@shader-slang.com"
git config user.name "Stdlib Reference Bot"
git add .
git commit -m "Update the core module reference"
git push
} else {
Write-Error "Could not find slangc executable in RelWithDebInfo or Release directories"
}
cd ../
# For local debugging only.
# Remove-Item -Path "D:\git_repo\stdlib-reference\global-decls" -Recurse -Force
# Remove-Item -Path "D:\git_repo\stdlib-reference\interfaces" -Recurse -Force
# Remove-Item -Path "D:\git_repo\stdlib-reference\types" -Recurse -Force
# Copy-Item -Path .\stdlib-reference\global-decls -Destination D:\git_repo\stdlib-reference\global-decls -Recurse -Force
# Copy-Item -Path .\stdlib-reference\interfaces -Destination D:\git_repo\stdlib-reference\interfaces -Recurse -Force
# Copy-Item -Path .\stdlib-reference\types -Destination D:\git_repo\stdlib-reference\types -Recurse -Force
# Copy-Item -Path .\stdlib-reference\_includes\stdlib-reference-toc.html -Destination D:\git_repo\stdlib-reference\_includes\stdlib-reference-toc.html -Force

View file

@ -1,12 +0,0 @@
$job = Start-Job -ArgumentList $PSScriptRoot -ScriptBlock {
Set-Location $args[0]
$code = (Get-Content -Raw -Path "scripts/Program.cs").ToString()
$assemblies = ("System.Core", "System.IO", "System.Collections")
Add-Type -ReferencedAssemblies $assemblies -TypeDefinition $code -Language CSharp
$path = Join-Path -Path $args[0] -ChildPath "user-guide"
[toc.Builder]::Run($path);
$path = Join-Path -Path $args[0] -ChildPath "gfx-user-guide"
[toc.Builder]::Run($path);
}
Wait-Job $job
Receive-Job -Job $job

View file

@ -1,127 +0,0 @@
#!/usr/bin/env bash
set -e
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
project_root="$(dirname "$script_dir")"
check_only=0
show_help() {
me=$(basename "$0")
cat <<EOF
$me: Build table of contents for documentation directories
Usage: $me [--help] [--source <path>] [--check-only]
Options:
--help Show this help message
--source Path to project root directory (defaults to parent of the script directory)
--check-only Check if TOC needs updating, exit 1 if changes needed
EOF
}
while [[ "$#" -gt 0 ]]; do
case $1 in
-h | --help)
show_help
exit 0
;;
--source)
project_root="$2"
shift
;;
--check-only)
check_only=1
;;
*)
echo "unrecognized argument: $1" >&2
show_help >&2
exit 1
;;
esac
shift
done
missing_bin=0
require_bin() {
local name="$1"
if ! command -v "$name" &>/dev/null; then
echo "This script needs $name, but it isn't in \$PATH" >&2
missing_bin=1
return
fi
}
require_bin "mcs"
require_bin "mono"
if [ "$missing_bin" -eq 1 ]; then
exit 1
fi
temp_dir=$(mktemp -d)
trap 'rm -rf "$temp_dir"' EXIT
docs_dir="$project_root/docs"
cat >"$temp_dir/temp_program.cs" <<EOL
$(cat "$script_dir/scripts/Program.cs")
namespace toc
{
class Program
{
static int Main(string[] args)
{
if (args.Length < 1)
{
Console.WriteLine("Please provide a directory path");
return 1;
}
try
{
Builder.Run(args[0]);
return 0;
}
catch (Exception ex)
{
Console.WriteLine(\$"Error: {ex.Message}");
return 1;
}
}
}
}
EOL
if ! mcs -r:System.Core "$temp_dir/temp_program.cs" -out:"$temp_dir/toc-builder.exe"; then
echo "Compilation of $script_dir/scripts/Program.cs failed" >&2
exit 1
fi
for dir in "user-guide" "gfx-user-guide"; do
if [ -d "$docs_dir/$dir" ]; then
if [ "$check_only" -eq 1 ]; then
# Ensure working directory is clean
if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then
echo "Working directory not clean, cannot check TOC" >&2
exit 1
fi
fi
if ! mono "$temp_dir/toc-builder.exe" "$docs_dir/$dir"; then
echo "TOC generation failed for $dir" >&2
exit 1
fi
if [ "$check_only" -eq 1 ]; then
if ! git -C "$project_root" diff --quiet "docs/$dir/toc.html" 2>/dev/null; then
git -C "$project_root" diff --color "docs/$dir/toc.html"
git -C "$project_root" checkout -- "docs/$dir/toc.html" 2>/dev/null
exit 1
fi
fi
else
echo "Directory $dir not found" >&2
fi
done

View file

@ -1,328 +0,0 @@
# Building Slang From Source
### TLDR
`cmake --workflow --preset release` to configure, build, and package a release
version of Slang.
## Prerequisites:
Please install:
- CMake (3.25 preferred, but 3.22 works[^1])
- A C++ compiler with support for C++17. GCC, Clang and MSVC are supported
- A CMake compatible backend, for example Visual Studio or Ninja
- Python3 (a dependency for building spirv-tools)
Optional dependencies for tests include
- CUDA
- OptiX
- NVAPI
- Aftermath
- X11
Other dependencies are sourced from submodules in the [./external](./external)
directory.
## Get the Source Code
Clone [this](https://github.com/shader-slang/slang) repository. Make sure to
fetch the submodules also.
```bash
git clone https://github.com/shader-slang/slang --recursive
```
## Configure and build
> This section assumes cmake 3.25 or greater, if you're on a lower version
> please see [building with an older cmake](#building-with-an-older-cmake)
For a Ninja based build system (all platforms) run:
```bash
cmake --preset default
cmake --build --preset releaseWithDebugInfo # or --preset debug, or --preset release
```
For Visual Studio run:
```bash
cmake --preset vs2022 # or 'vs2019' or `vs2022-dev`
start devenv ./build/slang.sln # to optionally open the project in Visual Studio
cmake --build --preset releaseWithDebugInfo # to build from the CLI, could also use --preset release or --preset debug
```
There also exists a `vs2022-dev` preset which turns on features to aid
debugging.
### WebAssembly build
In order to build WebAssembly build of Slang, Slang needs to be compiled with
[Emscripten SDK](https://github.com/emscripten-core/emsdk). You can find more
information about [Emscripten](https://emscripten.org/).
You need to clone the EMSDK repo. And you need to install and activate the latest.
```bash
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
```
For non-Windows platforms
```bash
./emsdk install latest
./emsdk activate latest
```
For Windows
```cmd
emsdk.bat install latest
emsdk.bat activate latest
```
After EMSDK is activated, Slang needs to be built in a cross compiling setup:
- build the `generators` target for the build platform
- configure the build with `emcmake` for the host platform
- build for the host platform.
> Note: For more details on cross compiling please refer to the
> [cross-compiling](docs/building.md#cross-compiling) section.
```bash
# Build generators.
cmake --workflow --preset generators --fresh
mkdir generators
cmake --install build --prefix generators --component generators
# Configure the build with emcmake.
# emcmake is available only when emsdk_env setup the environment correctly.
pushd ../emsdk
source ./emsdk_env # For Windows, emsdk_env.bat
popd
emcmake cmake -DSLANG_GENERATORS_PATH=generators/bin --preset emscripten -G "Ninja"
# Build slang-wasm.js and slang-wasm.wasm in build.em/Release/bin
cmake --build --preset emscripten --target slang-wasm
```
> Note: If the last build step fails, try running the command that `emcmake`
> outputs, directly.
## Installing
Build targets may be installed using cmake:
```bash
cmake --build . --target install
```
This should install `SlangConfig.cmake` that should allow `find_package` to work.
SlangConfig.cmake defines `SLANG_EXECUTABLE` variable that will point to `slangc`
executable and also define `slang::slang` target to be linked to.
For now, `slang::slang` is the only exported target defined in the config which can
be linked to.
Example usage
```cmake
find_package(slang REQUIRED PATHS ${your_cmake_install_prefix_path} NO_DEFAULT_PATH)
# slang_FOUND should be automatically set
target_link_libraries(yourLib PUBLIC
slang::slang
)
```
## Testing
```bash
build/Debug/bin/slang-test
```
See the [documentation on testing](../tools/slang-test/README.md) for more information.
## More niche topics
### CMake options
| Option | Default | Description |
|-----------------------------------|----------------------------|----------------------------------------------------------------------------------------------|
| `SLANG_VERSION` | Latest `v*` tag | The project version, detected using git if available |
| `SLANG_EMBED_CORE_MODULE` | `TRUE` | Build slang with an embedded version of the core module |
| `SLANG_EMBED_CORE_MODULE_SOURCE` | `TRUE` | Embed the core module source in the binary |
| `SLANG_ENABLE_DXIL` | `TRUE` | Enable generating DXIL using DXC |
| `SLANG_ENABLE_ASAN` | `FALSE` | Enable ASAN (address sanitizer) |
| `SLANG_ENABLE_FULL_IR_VALIDATION` | `FALSE` | Enable full IR validation (SLOW!) |
| `SLANG_ENABLE_IR_BREAK_ALLOC` | `FALSE` | Enable IR BreakAlloc functionality for debugging. |
| `SLANG_ENABLE_GFX` | `TRUE` | Enable gfx targets |
| `SLANG_ENABLE_SLANGD` | `TRUE` | Enable language server target |
| `SLANG_ENABLE_SLANGC` | `TRUE` | Enable standalone compiler target |
| `SLANG_ENABLE_SLANGRT` | `TRUE` | Enable runtime target |
| `SLANG_ENABLE_SLANG_GLSLANG` | `TRUE` | Enable glslang dependency and slang-glslang wrapper target |
| `SLANG_ENABLE_TESTS` | `TRUE` | Enable test targets, requires SLANG_ENABLE_GFX, SLANG_ENABLE_SLANGD and SLANG_ENABLE_SLANGRT |
| `SLANG_ENABLE_EXAMPLES` | `TRUE` | Enable example targets, requires SLANG_ENABLE_GFX |
| `SLANG_LIB_TYPE` | `SHARED` | How to build the slang library |
| `SLANG_ENABLE_RELEASE_DEBUG_INFO` | `TRUE` | Enable generating debug info for Release configs |
| `SLANG_ENABLE_SPLIT_DEBUG_INFO` | `TRUE` | Enable generating split debug info for Debug and RelWithDebInfo configs |
| `SLANG_SLANG_LLVM_FLAVOR` | `FETCH_BINARY_IF_POSSIBLE` | How to set up llvm support |
| `SLANG_SLANG_LLVM_BINARY_URL` | System dependent | URL specifying the location of the slang-llvm prebuilt library |
| `SLANG_GENERATORS_PATH` | `` | Path to an installed `all-generators` target for cross compilation |
The following options relate to optional dependencies for additional backends
and running additional tests. Left unchanged they are auto detected, however
they can be set to `OFF` to prevent their usage, or set to `ON` to make it an
error if they can't be found.
| Option | CMake hints | Notes |
|--------------------------|--------------------------------|----------------------------------------------------------------------------------------------|
| `SLANG_ENABLE_CUDA` | `CUDAToolkit_ROOT` `CUDA_PATH` | Enable running tests with the CUDA backend, doesn't affect the targets Slang itself supports |
| `SLANG_ENABLE_OPTIX` | `Optix_ROOT_DIR` | Requires CUDA |
| `SLANG_ENABLE_NVAPI` | `NVAPI_ROOT_DIR` | Only available for builds targeting Windows |
| `SLANG_ENABLE_AFTERMATH` | `Aftermath_ROOT_DIR` | Enable Aftermath in GFX, and add aftermath crash example to project |
| `SLANG_ENABLE_XLIB` | | |
### Advanced options
| Option | Default | Description |
|------------------------------------|---------|--------------------------------------------------------------------------------------------------------------------------------|
| `SLANG_ENABLE_DX_ON_VK` | `FALSE` | Enable running the DX11 and DX12 tests on non-warning Windows platforms via vkd3d-proton, requires system-provided d3d headers |
| `SLANG_ENABLE_SLANG_RHI` | `TRUE` | Enable building and using [slang-rhi](https://github.com/shader-slang/slang-rhi) for tests |
| `SLANG_USE_SYSTEM_MINIZ` | `FALSE` | Build using system Miniz library instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_LZ4` | `FALSE` | Build using system LZ4 library instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_VULKAN_HEADERS` | `FALSE` | Build using system Vulkan headers instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_SPIRV_HEADERS` | `FALSE` | Build using system SPIR-V headers instead of the bundled version in [./external](./external) |
| `SLANG_USE_SYSTEM_UNORDERED_DENSE` | `FALSE` | Build using system unordered dense instead of the bundled version in [./external](./external) |
| `SLANG_SPIRV_HEADERS_INCLUDE_DIR` | `` | Use this specific path to SPIR-V headers instead of the bundled version in [./external](./external) |
### LLVM Support
There are several options for getting llvm-support:
- Use a prebuilt binary slang-llvm library:
`-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY` or `-DSLANG_SLANG_LLVM_FLAVOR=FETCH_BINARY_IF_POSSIBLE` (this is the default)
- You can set `SLANG_SLANG_LLVM_BINARY_URL` to point to a local
`libslang-llvm.so/slang-llvm.dll` or set it to a URL of an zip/archive
containing such a file
- If this isn't set then the build system tries to download it from the
release on github matching the current tag. If such a tag doesn't exist
or doesn't have the correct os*arch combination then the latest release
will be tried.
- If `SLANG_SLANG_LLVM_BINARY_URL` is `FETCH_BINARY_IF_POSSIBLE` then in
the case that a prebuilt binary can't be found then the build will proceed
as though `DISABLE` was chosen
- Use a system supplied LLVM: `-DSLANG_SLANG_LLVM_FLAVOR=USE_SYSTEM_LLVM`, you
must have llvm-13.0 and a matching libclang installed. It's important that
either:
- You don't end up linking to a dynamic libllvm.so, this will almost
certainly cause multiple versions of LLVM to be loaded at runtime,
leading to errors like `opt: CommandLine Error: Option
'asm-macro-max-nesting-depth' registered more than once!`. Avoid this by
compiling LLVM without the dynamic library.
- Anything else which may be linked in (for example Mesa, also dynamically
loads the same llvm object)
- Do not enable LLVM support: `-DSLANG_SLANG_LLVM_FLAVOR=DISABLE`
To build only a standalone slang-llvm, you can run:
```bash
cmake --workflow --preset slang-llvm
```
This will generate `build/dist-release/slang-slang-llvm.zip` containing the
library. This, of course, uses the system LLVM to build slang-llvm, otherwise
it would just be a convoluted way to download a prebuilt binary.
### Cross compiling
Slang generates some code at build time, using generators build from this
codebase. Due to this, for cross compilation one must already have built these
generators for the build platform. Build them with the `generators` preset, and
pass the install path to the cross building CMake invocation using
`SLANG_GENERATORS_PATH`
Non-Windows platforms:
```bash
# build the generators
cmake --workflow --preset generators --fresh
mkdir build-platform-generators
cmake --install build --config Release --prefix build-platform-generators --component generators
# reconfigure, pointing to these generators
# Here is also where you should set up any cross compiling environment
cmake \
--preset default \
--fresh \
-DSLANG_GENERATORS_PATH=build-platform-generators/bin \
-Dwhatever-other-necessary-options-for-your-cross-build \
# for example \
-DCMAKE_C_COMPILER=my-arch-gcc \
-DCMAKE_CXX_COMPILER=my-arch-g++
# perform the final build
cmake --workflow --preset release
```
Windows
```bash
# build the generators
cmake --workflow --preset generators --fresh
mkdir build-platform-generators
cmake --install build --config Release --prefix build-platform-generators --component generators
# reconfigure, pointing to these generators
# Here is also where you should set up any cross compiling environment
# For example
./vcvarsamd64_arm64.bat
cmake \
--preset default \
--fresh \
-DSLANG_GENERATORS_PATH=build-platform-generators/bin \
-Dwhatever-other-necessary-options-for-your-cross-build
# perform the final build
cmake --workflow --preset release
```
### Example cross compiling with MSVC to windows-aarch64
One option is to build using the ninja generator, which requires providing the
native and cross environments via `vcvarsall.bat`
```bash
vcvarsall.bat
cmake --workflow --preset generators --fresh
mkdir generators
cmake --install build --prefix generators --component generators
vsvarsall.bat x64_arm64
cmake --preset default --fresh -DSLANG_GENERATORS_PATH=generators/bin
cmake --workflow --preset release
```
Another option is to build using the Visual Studio generator which can find
this automatically
```
cmake --preset vs2022 # or --preset vs2019
cmake --build --preset generators # to build from the CLI
cmake --install build --prefix generators --component generators
rm -rf build # The Visual Studio generator will complain if this is left over from a previous build
cmake --preset vs2022 --fresh -A arm64 -DSLANG_GENERATORS_PATH=generators/bin
cmake --build --preset release
```
## Building with an older CMake
Because older CMake versions don't support all the features we want to use in
CMakePresets, you'll have to do without the presets. Something like the following
```bash
cmake -B build -G Ninja
cmake --build build -j
```
## Notes
[^1] below 3.25, CMake lacks the ability to mark directories as being
system directories (https://cmake.org/cmake/help/latest/prop_tgt/SYSTEM.html#prop_tgt:SYSTEM),
this leads to an inability to suppress warnings originating in the
dependencies in `./external`, so be prepared for some additional warnings.

View file

@ -1,36 +0,0 @@
# Our CI
There are github actions for building and testing slang.
## Tests
Most configurations run a restricted set of tests, however on some self hosted
runners we run the full test suite, as well as running Falcor's test suite with
the new slang build.
## Building LLVM
We require a static build of LLVM for building slang-llvm, we build and cache
this in all workflow runs. Since this changes infrequently, the cache is almost
always hit. A cold build takes about an hour on the slowest platform. The
cached output is a few hundred MB, so conceivably if we add many more platforms
we might be caching more than the 10GB github allowance, which would
necessitate being a bit more complicated in building and tracking outputs here.
For slang-llvm, this is handled the same as any other dependency, except on
Windows Debug builds, where we are required by the differences in Debug/Release
standard libraries to always make a release build, this is noted in the ci
action yaml file.
Note that we don't use sccache while building LLVM, as it changes very
infrequently. The caching of LLVM is done by caching the final build product
only.
## sccache
> Due to reliability issues, we are not currently using sccache, this is
> historical/aspirational.
The CI actions use sccache, keyed on compiler and platform, this runs on all
configurations and significantly speeds up small source change builds. This
cache can be safely missed without a large impact on build times.

View file

@ -1,648 +0,0 @@
Slang CPU Target Support
========================
Slang has preliminary support for producing CPU source and binaries.
# Features
* Can compile C/C++/Slang source to binaries (executables, shared libraries or [directly executable](#host-callable))
* Does *not* require a C/C++ be installed if [slang-llvm](#slang-llvm) is available (as distributed with slang binary distributions)
* Can compile Slang source into C++ source code
* Supports compute style shaders
# Limitations
These limitations apply to Slang transpiling to C++.
* Barriers are not supported (making these work would require an ABI change)
* Atomics are not currently supported
* Limited support for [out of bounds](#out-of-bounds) accesses handling
* Entry point/s cannot be named `main` (this is because downstream C++ compiler/s expecting a regular `main`)
* `float16_t` type is not currently supported
For current C++ source output, the compiler needs to support partial specialization.
# How it works
The initial version works by using a 'downstream' C/C++ compiler. A C++ compiler does *not* in general need to be installed on a system to compile and execute code as long as [slang-llvm](#slang-llvm) is available. A [regular C/C++](#regular-cpp) compiler can also be used, allowing access to tooling, such as profiling and debuggers, as well as being able to use regular host development features such as linking, libraries, shared libraries/dlls and executables.
The C/C++ backend can be directly accessed much like 'dxc', 'fxc' of 'glslang' can, using the pass-through mechanism with the following new backends...
```
SLANG_PASS_THROUGH_CLANG, ///< Clang C/C++ compiler
SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler
SLANG_PASS_THROUGH_GCC, ///< GCC C/C++ compiler
SLANG_PASS_THROUGH_LLVM, ///< slang-llvm 'compiler' - includes LLVM and Clang
SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type
```
Sometimes it is not important which C/C++ compiler is used, and this can be specified via the 'Generic C/C++' option. This will aim to use the compiler that is most likely binary compatible with the compiler that was used to build the Slang binary being used.
To make it possible for Slang to produce CPU code, in this first iteration we convert Slang code into C/C++ which can subsequently be compiled into CPU code. If source is desired instead of a binary this can be specified via the SlangCompileTarget. These can be specified on the `slangc` command line as `-target cpp`.
When using the 'pass through' mode for a CPU based target it is currently necessary to set an entry point, even though it's basically ignored.
In the API the `SlangCompileTarget`s are
```
SLANG_C_SOURCE ///< The C language
SLANG_CPP_SOURCE ///< The C++ language
SLANG_HOST_CPP_SOURCE, ///< C++ code for `host` style
```
Using the `-target` command line option
* `C_SOURCE`: c
* `CPP_SOURCE`: cpp,c++,cxx
* `HOST_CPP_SOURCE`: host-cpp,host-c++,host-cxx
Note! Output of C source is not currently supported.
If a CPU binary is required this can be specified as a `SlangCompileTarget` of
```
SLANG_EXECUTABLE ///< Executable (for hosting CPU/OS)
SLANG_SHADER_SHARED_LIBRARY ///< A shared library/Dll (for hosting CPU/OS)
SLANG_SHADER_HOST_CALLABLE ///< A CPU target that makes `compute kernel` compiled code available to be run immediately
SLANG_HOST_HOST_CALLABLE ///< A CPU target that makes `scalar` compiled code available to be run immediately
SLANG_OBJECT_CODE, ///< Object code that can be used for later linking
```
Using the `-target` command line option
* `EXECUTABLE`: exe, executable
* `SHADER_SHARED_LIBRARY`: sharedlib, sharedlibrary, dll
* `SHADER_HOST_CALLABLE`: callable, host-callable
* `OBJECT_CODE`: object-conde
* `HOST_HOST_CALLABLE`: host-host-callable
Using `host-callable` types from the the command line, other than to test such code compile and can be loaded for host execution.
For launching a [shader like](#compile-style) Slang code on the CPU, there typically needs to be binding of values passed the entry point function. How this works is described in the [ABI section](#abi). Functions *can* be executed directly but care must be taken to [export](#visibility) them and such that there isn't an issue with [context threading](#context-threading).
If a binary target is requested, the binary contents can be returned in a ISlangBlob just like for other targets. When using a [regular C/C++ compiler](#regular-cpp) the CPU binary typically must be saved as a file and then potentially marked for execution by the OS. It may be possible to load shared libraries or dlls from memory - but doing so is a non standard feature, that requires unusual work arounds. If possible it is typically fastest and easiest to use [slang-llvm](#slang-llvm) to directly execute slang or C/C++ code.
## <a id="compile-style"/>Compilation Styles
There are currently two styles of *compilation style* supported - `host` and `shader`.
The `shader` style implies
* The code *can* be executed in a GPU-kernel like execution model, launched across multiple threads (as described in the [ABI](#abi))
* Currently no reference counting
* Only functionality from the Slang core module, built in HLSL or anything supplied by a [COM interfaces](#com-interface) is available
* Currently [slang-llvm](#slang-llvm) only supports the `shader` style
The `host` style implies
* Execution style is akin to more regular CPU scalar code
* Typically requires linking with `slang-rt` and use of `slang-rt` types such as `Slang::String`
* Allows use of `new`
* Allows the use of `class` for reference counted types
* COM interfaces are reference counted
The styles as used with [host-callable](#host-callable) are indicated via the API by
```
SLANG_SHADER_HOST_CALLABLE ///< A CPU target that makes `compute kernel` compiled code available to be run immediately
SLANG_HOST_HOST_CALLABLE ///< A CPU target that makes `scalar` compiled code available to be run immediately
```
Or via the `-target` command line options
* For 'shader' `callable` `host-callable`
* For 'host' `host-host-callable`
For an example of the `host` style please look at "examples/cpu-hello-world".
## <a id="host-callable"/>Host callable
Slang supports `host-callable` compilation targets which allow for the direct execution of the compiled code on the CPU. Currently this style of execution is supported if [slang-llvm](#slang-llvm) or a [regular C/C++ compiler](#regular-cpp) are available.
There are currently two [compilation styles](#compile-style) supported.
In order to call into `host-callable` code after compilation it's necessary to access the result via the `ISlangSharedLibrary` interface.
Please look at the [ABI](#abi) section for more specifics around ABI usage especially for `shader` [compile styles](#compile-style).
```C++
slang::ICompileRequest* request = ...;
const SlangResult compileRes = request->compile();
// Even if there were no errors that forced compilation to fail, the
// compiler may have produced "diagnostic" output such as warnings.
// We will go ahead and print that output here.
//
if(auto diagnostics = request->getDiagnosticOutput())
{
printf("%s", diagnostics);
}
// Get the 'shared library' (note that this doesn't necessarily have to be implemented as a shared library
// it's just an interface to executable code).
ComPtr<ISlangSharedLibrary> sharedLibrary;
SLANG_RETURN_ON_FAIL(request->getTargetHostCallable(0, sharedLibrary.writeRef()));
// We can now find exported functions/variables via findSymbolAddressByName
// For a __global public __extern_cpp int myGlobal;
{
auto myGlobalPtr = (int*)sharedLibrary->findSymbolAddressByName("myGlobal");
if (myGlobalPtr)
{
*myGlobalPtr = 10;
}
}
// To get a function
//
// public __extern_cpp int add(int a, int b);
// Test a free function
{
typedef int (*AddFunc)(int a, int b);
auto func = (AddFunc)sharedLibrary->findFuncByName("add");
if (func)
{
// Let's add!
int c = func(10, 20):
}
}
```
## <a id="slang-llvm"/>slang-llvm
`slang-llvm` is a special Slang version of [LLVM](https://llvm.org/). It's current main purpose is to allow compiling C/C++ such that it is [directly available](#host-callable) for execution using the LLVM JIT feature. If `slang-llvm` is available it is the default downstream compiler for [host-callable](#host-callable). This is because it allows for faster compilation, avoids the file system, and can execute the compiled code directly. [Regular C/C++ compilers](#regular-cpp) can be used for [host-callable](#host-callable) but requires writing source files to the file system and creating/loading shared-libraries/dlls to make the feature work. Additionally using `slang-llvm` avoids the need for a C/C++ compiler installed on a target system.
`slang-llvm` contains the Clang C++ compiler, so it is possible to also compile and execute C/C++ code in the [host-callable](#host-callable) style.
Limitations of using `slang-llvm`
* Can only currently be used for [shader style](#compile-style)
* Cannot produce object files, libraries, OS executables or binaries
* Is *limited* because it is not possible to directly access libraries such as the C or C++ standard libraries (see [COM interface](#com-interface) for a work-around)
* It's not possible to source debug into `slang-llvm` compiled code running on the JIT (see [debugging](#debugging) for a work-around)
* Not currently possible to return as a ISlangBlob representation
You can detect if `slang-llvm` is available via
```C++
slang::IGlobalSession* slangSession = ...;
const bool hasSlangLlvm = SLANG_SUCCEEDED(slangSession->checkPassThroughSupport(SLANG_PASS_THROUGH_LLVM));
```
## <a id="regular-cpp"/>Regular C/C++ compilers
Slang can work with regular C/C++ 'downstream' compilers. It has been tested to work with Visual Studio, Clang and G++/Gcc on Windows and Linux.
Under the covers when Slang is used to generate a binary via a C/C++ compiler, it must do so through the file system. Currently this means the source (say generated by Slang) and the binary (produced by the C/C++ compiler) must all be files. To make this work Slang uses temporary files. The reasoning for hiding this mechanism, other than simplicity, is that it allows using with [slang-llvm](#slang-llvm) without any changes.
## <a id="visibility"/>Visibility
In a typical Slang [shader like](#compile-style) scenario, functionality is exposed via entry points. It can be convenient and desirable to be able to call Slang functions directly from application code, and not just via entry points. By default non entry point functions are *removed* if they are not reachable by the specified entry point. Additionally for non entry point functions Slang typically generates function names that differ from the original name.
To work around these two issues the `public` and `__extern_cpp` modifiers can be used.
`public` makes the variable or function visible outside of the module even if it isn't used within the module. For the function to work it will also keep around any function or variable it accesses.
Note! Some care is needed here around [context threading](#context-threading) - if a function or any function a function accesses requires state held in the context, the signature of the function will be altered to include the context as the first parameter.
Making a function or variable `public` does not mean that the name remains the same. To indicate that the name should not be altered use the `__extern_cpp` modifier. For example
```
// myGlobal will be visible to the application (note the __global modifier additionally means it has C++ global behavior)
__global public __extern_cpp int myGlobal;
// myFunc is available to the application
public __extern_cpp myFunc(int a)
{
return a * a;
}
```
## <a id="com-interface"/>COM interface support
Slang has preliminary support for [Component Object Model (COM)](https://en.wikipedia.org/wiki/Component_Object_Model) interfaces in CPU code.
```
[COM]
interface IDoThings
{
int doThing(int a, int b);
int calcHash(NativeString in);
void printMessage(NativeString nativeString);
}
```
This support provides a way for an application to provide access to functionality in the application runtime - essentially it allows Slang code to call into application code. To do this a COM interface can be created that exposes the desired functionality. The interface/s can be made available through any of the normal mechanisms - such as through a constant buffer variable. Additionally [`__global`](#actual-global) provides a way to make functions available to Slang code without the need for [context threading](#context-threading).
The example "examples/cpu-com-example" shows this at work.
## <a id="actual-global"/>Global support
The Slang language is based on the HLSL language. This heritage means that globals have slightly different meaning to typical C/C++ usage.
```
int myGlobal; ///< A constant value stored in a constant buffer
static int staticMyGlobal; ///< A global that cannot be seen by the application
static const int staticConstMyGlobal; ///< A fixed value
```
The variable `myGlobal` will be a member of a constant buffer, meaning it's value can only change via bindings and not during execution. For some uses having `myGlobal` in the constant buffer might be appropriate, for example
* It's use is reached from a [shader style](#compile-style) entry point
* It's value is constant across the launch
In Slang a variable can be declared as global in the C/C++ sense via the `__global` modifier. For example
```
__global int myGlobal;
```
Doing so means
* `myGlobal` will not be defined in the constant buffer
* It can be used in functions that do not have access to the [constant buffer](#context-threading)
* It can be modified in the kernel
* Can only be used on CPU targets (currently `__global` is not supported on the GPU targets)
One disadvantage of using `__global` is in multi-threaded environments, with multiple launches on multiple CPU threads, there is only one global and will likely cause problems unless the global value is the same across all threads.
It may be useful to set a global directly via host code, without having to write a function to enable the access. This is possible by using [`public`](#visibility) and [`__extern_cpp`](#visibility) modifiers. For example
```
__global public __extern_cpp int myGlobal;
```
The global can now be set from host code via
```C++
slang::ICompileRequest = ...;
// Get the 'shared library' (note that this doesn't necessarily have to be implemented as a shared library
// it's just an interface to executable code).
ComPtr<ISlangSharedLibrary> sharedLibrary;
SLANG_RETURN_ON_FAIL(request->getTargetHostCallable(0, sharedLibrary.writeRef()));
// Set myGlobal to 20
{
auto myGlobalPtr = (int*)sharedLibrary->findSymbolAddressByName("myGlobal");
*myGlobalPtr = 20;
}
```
In terms of reflection `__global` variables are not visible.
## NativeString
Slang supports a rich 'String' type when using the [host style](#compile-style), which for C++ targets is implemented as the `Slang::String` C++ type. The type is only available on CPU targets that support `slang-rt`.
Some limited String-like support is available via `NativeString` type which for C/C++ CPU targets is equivalent to `const char*`. For GPU targets this will use the same hash mechanism as normally available.
`NativeString` is supported by all [shader compilation styles](#compile-style) including [slang-llvm](#slang-llvm).
TODO(JS): What happens with String with shader compile style on CPU? Shouldn't it be the same as GPU (and reflected as such in reflection)?
## Debugging
It is currently not possible to step into LLVM-JIT code when using [slang-llvm](#slang-llvm). Fortunately it is possible to step into code compiled via a [regular C/C++ compiler](#regular-cpp).
Below is a code snippet showing how to switch to a [regular C/C++ compiler](#regular-cpp) at runtime.
```C++
SlangPassThrough findRegularCppCompiler(slang::IGlobalSession* slangSession)
{
// Current list of 'regular' C/C++ compilers
const SlangPassThrough cppCompilers[] =
{
SLANG_PASS_THROUGH_VISUAL_STUDIO,
SLANG_PASS_THROUGH_GCC,
SLANG_PASS_THROUGH_CLANG,
};
// Do we have a C++ compiler
for (const auto compiler : cppCompilers)
{
if (SLANG_SUCCEEDED(slangSession->checkPassThroughSupport(compiler)))
{
return compile;
}
}
return SLANG_PASS_THROUGH_NONE;
}
SlangResult useRegularCppCompiler(slang::IGlobalSession* session)
{
const auto regularCppCompiler = findRegularCppCompiler(session)
if (regularCppCompiler != SLANG_PASS_THROUGH_NONE)
{
slangSession->setDownstreamCompilerForTransition(SLANG_CPP_SOURCE, SLANG_SHADER_HOST_CALLABLE, regularCppCompiler);
slangSession->setDownstreamCompilerForTransition(SLANG_CPP_SOURCE, SLANG_HOST_HOST_CALLABLE, regularCppCompiler);
return SLANG_OK;
}
return SLANG_FAIL;
}
```
It is generally recommended to use [slang-llvm](#slang-llvm) if that is appropriate, but to switch to using a [regular C/C++ compiler](#regular-cpp) when debugging is needed. This should be largely transparent to most code.
Executing CPU Code
==================
In typical Slang operation when code is compiled it produces either source or a binary that can then be loaded by another API such as a rendering API. With CPU code the binary produced could be saved to a file and then executed as an exe or a shared library/dll. In practice though it is common to want to be able to execute compiled code immediately. Having to save off to a file and then load again can be awkward. It is also not necessarily the case that code needs to be saved to a file to be executed.
To handle being able call code directly, code can be compiled using the [host-callable](#host-callable).
For pass through compilation of C/C++ this mechanism allows any functions marked for export to be directly queried. Marking for export is a C/C++ compiler specific feature. Look at the definition of `SLANG_PRELUDE_EXPORT` in the [C++ prelude](#prelude).
For a complete example on how to execute CPU code using `spGetEntryPointHostCallable`/`getEntryPointHostCallable` look at code in `example/cpu-hello-world`.
<a id="abi"/>Application Binary Interface (ABI)
===
Say we have some Slang source like the following:
```
struct Thing { int a; int b; }
Texture2D<float> tex;
SamplerState sampler;
RWStructuredBuffer<int> outputBuffer;
ConstantBuffer<Thing> thing3;
[numthreads(4, 1, 1)]
void computeMain(
uint3 dispatchThreadID : SV_DispatchThreadID,
uniform Thing thing,
uniform Thing thing2)
{
// ...
}
```
When compiled into a [shader compile style](#compile-style) shared library/dll/host-callable - how is it invoked? An entry point in the Slang source code produces several exported functions. The 'default' exported function has the same name as the entry point in the original source. It has the signature
```
void computeMain(ComputeVaryingInput* varyingInput, UniformEntryPointParams* uniformParams, UniformState* uniformState);
```
NOTE! Using `main` as an entry point name should be avoided if CPU is a target because it typically causes compilation errors due it's normal C/C++ usage.
ComputeVaryingInput is defined in the prelude as
```
struct ComputeVaryingInput
{
uint3 startGroupID;
uint3 endGroupID;
};
```
`ComputeVaryingInput` allows specifying a range of groupIDs to execute - all the ids in a grid from startGroup to endGroup, but not including the endGroupIDs. Most compute APIs allow specifying an x,y,z extent on 'dispatch'. This would be equivalent as having startGroupID = { 0, 0, 0} and endGroupID = { x, y, z }. The exported function allows setting a range of groupIDs such that client code could dispatch different parts of the work to different cores. This group range mechanism was chosen as the 'default' mechanism as it is most likely to achieve the best performance.
There are two other functions that consist of the entry point name postfixed with `_Thread` and `_Group`. For the entry point 'computeMain' these functions would be accessible from the shared library interface as `computeMain_Group` and `computeMain_Thread`. `_Group` has the same signature as the listed for computeMain, but it doesn't execute a range, only the single group specified by startGroupID (endGroupID is ignored). That is all of the threads within the group (as specified by `[numthreads]`) will be executed in a single call.
It may be desirable to have even finer control of how execution takes place down to the level of individual 'thread's and this can be achieved with the `_Thread` style. The signature looks as follows
```
struct ComputeThreadVaryingInput
{
uint3 groupID;
uint3 groupThreadID;
};
void computeMain_Thread(ComputeThreadVaryingInput* varyingInput, UniformEntryPointParams* uniformParams, UniformState* uniformState);
```
When invoking the kernel at the `thread` level it is a question of updating the groupID/groupThreadID, to specify which thread of the computation to execute. For the example above we have `[numthreads(4, 1, 1)]`. This means groupThreadID.x can vary from 0-3 and .y and .z must be 0. That groupID.x indicates which 'group of 4' to execute. So groupID.x = 1, with groupThreadID.x=0,1,2,3 runs the 4th, 5th, 6th and 7th 'thread'. Being able to invoke each thread in this way is flexible - in that any specific thread can specified and executed. It is not necessarily very efficient because there is the call overhead and a small amount of extra work that is performed inside the kernel.
Note that the `_Thread` style signature is likely to change to support 'groupshared' variables in the near future.
In terms of performance the 'default' function is probably the most efficient for most common usages. The `_Group` style allows for slightly less loop overhead, but with many invocations this will likely be drowned out by the extra call/setup overhead. The `_Thread` style in most situations will be the slowest, with even more call overhead, and less options for the C/C++ compiler to use faster paths.
The UniformState and UniformEntryPointParams struct typically vary by shader. UniformState holds 'normal' bindings, whereas UniformEntryPointParams hold the uniform entry point parameters. Where specific bindings or parameters are located can be determined by reflection. The structures for the example above would be something like the following...
```
struct UniformEntryPointParams
{
Thing thing;
Thing thing2;
};
struct UniformState
{
Texture2D<float > tex;
SamplerState sampler;
RWStructuredBuffer<int32_t> outputBuffer;
Thing* thing3;
};
```
Notice that of the entry point parameters `dispatchThreadID` is not part of UniformEntryPointParams and this is because it is not uniform.
`ConstantBuffer` and `ParameterBlock` will become pointers to the type they hold (as `thing3` is in the above structure).
`StructuredBuffer<T>`,`RWStructuredBuffer<T>` become
```
T* data;
size_t count;
```
`ByteAddressBuffer`, `RWByteAddressBuffer` become
```
uint32_t* data;
size_t sizeInBytes;
```
Resource types become pointers to interfaces that implement their features. For example `Texture2D` become a pointer to a `ITexture2D` interface that has to be implemented in client side code. Similarly SamplerState and SamplerComparisonState become `ISamplerState` and `ISamplerComparisonState`.
The actual definitions for the interfaces for resource types, and types are specified in 'slang-cpp-types.h' in the `prelude` directory.
## Unsized arrays
Unsized arrays can be used, which are indicated by an array with no size as in `[]`. For example
```
RWStructuredBuffer<int> arrayOfArrays[];
```
With normal 'sized' arrays, the elements are just stored contiguously within wherever they are defined. With an unsized array they map to `Array<T>` which is...
```
T* data;
size_t count;
```
Note that there is no method in the shader source to get the `count`, even though on the CPU target it is stored and easily available. This is because of the behavior on GPU targets
* That the count has to be stored elsewhere (unlike with CPU)
* On some GPU targets there is no bounds checking - accessing outside the bound values can cause *undefined behavior*
* The elements may be laid out *contiguously* on GPU
In practice this means if you want to access the `count` in shader code it will need to be passed by another mechanism - such as within a constant buffer. It is possible in the future support may be added to allow direct access of `count` work across targets transparently.
It is perhaps worth noting that the CPU allows us to have an indirection (a pointer to the unsized arrays contents) which has the potential for more flexibility than is possible on GPU targets. GPU target typically require the elements to be placed 'contiguously' from their location in their `container` - be that registers or in memory. This means on GPU targets there may be other restrictions on where unsized arrays can be placed in a structure for example, such as only at the end. If code needs to work across targets this means these restrictions will need to be followed across targets.
## <a id="context-threading"/>Context Threading
The [shader compile style](#compile-style) brings some extra issues to bare. In the HLSL compute kernel launch model application visible variables and resource are bound. As described in the [ABI](#abi) section these bindings and additional information identifying a compute thread are passed into the launch as a context. Take for example the code snippet below
```
int myGlobal;
int myFunc(int v)
{
return myGlobal + v;
}
int anotherFunc(int a, int b)
{
return a + b;
}
[numthreads(4, 1, 1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
outputBuffer[dispatchThreadID.x] = myFunc(dispatchThreadID.x) + anotherFunc(1, dispatchThreadID.y);
}
```
The function `myFunc` accesses a variable `myGlobal` that is held within a constant buffer. The function cannot be meaningfully executed without access to the context, and the context is available as a parameter passed through `computeMain` entry point at launch. This means the *actual* signature of this function in output code will be something like
```
int32_t myFunc_0(KernelContext_0 * kernelContext_0)
{
return *(&(*(&kernelContext_0->globalParams_0))->myGlobal_0) + int(1);
}
```
The context parameter has been *threaded* into this function. This *threading* will happen to any function that accesses any state that is held in the context. This behavior also happens transitively - if a function *could* call *any* another function that requires the context, the context will be threaded through to it also.
If application code assumed `myFunc` could be called with no parameters a crash would likely ensue. Note that `anotherFunc` does not have the issue because it doesn't perform an access that needs the context, and so no context threading is added.
If a global is desired in a function that wants to be called from the application, the [`__global`](#actual-global) modifier can be used.
## <a id="prelude"/>Prelude
For C++ targets, there is code to support the Slang generated source defined within the 'prelude'. The prelude is inserted text placed before the Slang generated C++ source. For the Slang command line tools as well as the test infrastructure, the prelude functionality is achieved through a `#include` in the prelude text of the `prelude/slang-cpp-prelude.h` specified with an absolute path. Doing so means other files the `slang-cpp-prelude.h` might need can be specified relatively, and include paths for the backend C/C++ compiler do not need to be modified.
The prelude needs to define
* 'Built in' types (vector, matrix, 'object'-like Texture, SamplerState etc)
* Scalar intrinsic function implementations
* Compiler based definations/tweaks
For the Slang prelude this is split into the following files...
* 'prelude/slang-cpp-prelude.h' - Header that includes all the other requirements & some compiler tweaks
* 'prelude/slang-cpp-scalar-intrinsics.h' - Scalar intrinsic implementations
* 'prelude/slang-cpp-types.h' - The 'built in types'
* 'slang.h' - Slang header is used for majority of compiler based definitions
For a client application - as long as the requirements of the generated code are met, the prelude can be implemented by whatever mechanism is appropriate for the client. For example the implementation could be replaced with another implementation, or the prelude could contain all of the required text for compilation. Setting the prelude text can be achieved with the method on the global session...
```
/** Set the 'prelude' for generated code for a 'downstream compiler'.
@param passThrough The downstream compiler for generated code that will have the prelude applied to it.
@param preludeText The text added pre-pended verbatim before the generated source
That for pass-through usage, prelude is not pre-pended, preludes are for code generation only.
*/
virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude(
SlangPassThrough passThrough,
const char* preludeText) = 0;
```
It may be useful to be able to include `slang-cpp-types.h` in C++ code to access the types that are used in the generated code. This introduces a problem in that the types used in the generated code might clash with types in client code. To work around this problem, you can wrap all of the types defined in the prelude with a namespace of your choosing. For example
```
#define SLANG_PRELUDE_NAMESPACE CPPPrelude
#include "../../prelude/slang-cpp-types.h"
```
Would wrap all the Slang prelude types in the namespace `CPPPrelude`, such that say a `StructuredBuffer<int32_t>` could be specified in C++ source code as `CPPPrelude::StructuredBuffer<int32_t>`.
The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
The *default* prelude is set to the contents of the files for C++ held in the prelude directory and is held within the Slang shared library. It is therefore typically not necessary to distribute Slang with prelude files.
Language aspects
================
# Arrays passed by Value
Slang follows the HLSL convention that arrays are passed by value. This is in contrast the C/C++ where arrays are passed by reference. To make generated C/C++ follow this convention an array is turned into a 'FixedArray' struct type. Sinces classes by default in C/C++ are passed by reference the wrapped array is also.
To get something similar to C/C++ operation the array can be marked `inout` to make it passed by reference.
Limitations
===========
# <a id="out-of-bounds"/>Out of bounds access
In HLSL code if an access is made out of bounds of a StructuredBuffer, execution proceceeds. If an out of bounds read is performed, a zeroed value is returned. If an out of bounds write is performed it's effectively a noop, as the value is discarded. On the CPU target this behavior is *not* supported by default.
For a debug CPU build an out of bounds access will assert, for a release build the behaviour is by default undefined. A limited Limited [zero index](#zero-index) out of bounds mechanism is supported, but must be enabled.
The reason for this is that such an access is difficult and/or slow to implement the identical GPU behavior on the CPU. The underlying problem is `operator[]` typically returns a reference to the contained value. If this is out of bounds - it's not clear what to return, in particular because the value may be read or written and moreover elements of the type might be written. In practice this means a global zeroed value cannot be returned.
This could be somewhat supported if code gen worked as followed for say
```
RWStructuredBuffer<float4> values;
values[3].x = 10;
```
Produces
```
template <typename T>
struct RWStructuredBuffer
{
T& at(size_t index, T& defValue) { return index < size ? values[index] : defValue; }
T* values;
size_t size;
};
RWStructuredBuffer<float4> values;
// ...
Vector<float, 3> defValue = {}; // Zero initialize such that read access returns default values
values.at(3).x = 10;
```
Note that '[] 'would be turned into the `at` function, which takes the default value as a parameter provided by the caller. If this is then written to then only the defValue is corrupted. Even this mechanism not be quite right, because if we write and then read again from the out of bounds reference in HLSL we may expect that 0 is returned, whereas here we get the value that was last written.
## <a id="zero-index"/>Zero index bound checking
If bounds checking is wanted in order to avoid undefined behavior and limit how memory is accessed `zero indexed` bounds checking might be appropriate. When enabled if an access is out of bounds the value at the zero index is returned. This is quite different behavior than the typical GPU behavior, but is fairly efficient and simple to implement. Importantly it means behavior is well defined and always 'in range' assuming there is an element.
To enable zero indexing bounds checking pass in the define `SLANG_ENABLE_BOUND_ZERO_INDEX` to a Slang compilation. This define is passed down to C++ and CUDA compilations, and the code in the CUDA and C++ preludes implement the feature. Note that zero indexed bounds checking will slow down accesses that are checked.
The C++ implementation of the feature can be seen by looking at the file "prelude/slang-cpp-types.h". For CUDA "prelude/slang-cuda-prelude.h".
The bounds checking macros are guarded such it is possible to replace the implementations, without directly altering the prelude.
TODO
====
# Main
* groupshared is not yet supported
* Output of header files
* Output multiple entry points
# Internal Slang compiler features
These issues are more internal Slang features/improvements
* Currently only generates C++ code, it would be fairly straight forward to support C (especially if we have 'intrinsic definitions')
* Have 'intrinsic definitions' in standard library - such that they can be generated where appropriate
+ This will simplify the C/C++ code generation as means Slang language will generate must of the appropriate code
* Currently 'construct' IR inst is supported as is, we may want to split out to separate instructions for specific scenarios
* Refactoring around swizzle. Currently in emit it has to check for a variety of scenarios - could be simplified with an IR pass and perhaps more specific instructions.

View file

@ -1,333 +0,0 @@
Slang CUDA Target Support
=========================
Slang has preliminary support for producing CUDA source, and PTX binaries using [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html).
NOTE! NVRTC is only available for 64-bit operating systems. On Windows Visual Studio make sure you are compiling for 'x64' and/or use 64 bit Slang binaries.
# Features
* Can compile Slang source into CUDA source code
* Supports compute style shaders
* Supports a 'bindless' CPU like model
* Can compile CUDA source to PTX through 'pass through' mechansism
# Limitations
These limitations apply to Slang transpiling to CUDA.
* Only supports the 'texture object' style binding (The texture object API is only supported on devices of compute capability 3.0 or higher. )
* Samplers are not separate objects in CUDA - they are combined into a single 'TextureObject'. So samplers are effectively ignored on CUDA targets.
* When using a TextureArray.Sample (layered texture in CUDA) - the index will be treated as an int, as this is all CUDA allows
* Care must be used in using `WaveGetLaneIndex` wave intrinsic - it will only give the right results for appropriate launches
* CUDA 'surfaces' are used for textures which are read/write (aka RWTexture).
The following are a work in progress or not implemented but are planned to be so in the future
* Some resource types remain unsupported, and not all methods on all types are supported
# How it works
For producing PTX binaries Slang uses [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html). NVRTC dll/shared library has to be available to Slang (for example in the appropriate PATH for example) for it to be able to produce PTX.
The NVRTC compiler can be accessed directly via the pass through mechanism and is identified by the enum value `SLANG_PASS_THROUGH_NVRTC`.
Much like other targets that use downstream compilers Slang can be used to compile CUDA source directly to PTX via the pass through mechansism. The Slang command line options will broadly be mapped down to the appropriate options for the NVRTC compilation. In the API the `SlangCompileTarget` for CUDA is `SLANG_CUDA_SOURCE` and for PTX is `SLANG_PTX`. These can also be specified on the Slang command line as `-target cuda` and `-target ptx`.
## Locating NVRTC
Finding NVRTC can require some nuance if a specific version is required. On the command line the `-nvrtc-path` option can be used to set the `path` to NVRTC. Also `spProcessCommandLineArguments`/`processCommandLineArguments` with `-nvrtc-path` or `setDownstreamCompilerPath` with `SLANG_PASS_THROUGH_NVRTC` can be used to set the location and/or name of NVRTC via the API.
Important points of note are
* The name of the shared library should *not* include any extension (such as `.dll`/`.so`/`.dynlib`) or prefix (such as `lib`).
* The path also *doesn't* have to be path, it can just be the shared library name. Doing so will mean it will be searched for by whatever the default mechanism is on the target.
* If a path and/or name is specified for NVRTC - this will be the *only* version searched for.
If a path/name is *not* specified for NVRTC, Slang will attempt to load a shared library called `nvrtc`. For non Windows targets this should be enough to find and load the latest version.
On Windows NVRTC dlls have a name the contains the version number, for example `nvrtc64_102_0.dll`. This will lead to the load of just `nvrtc` to fail. One approach to fix this is to place the NVRTC dll and associated files in the same directory as slang.dll, and rename the main dll to `nvrtc.dll`. Another approach is to specify directly on the command line the name including the version, as previously discussed. For example
`-nvrtc-path nvrtc64_102_0`
will load NVRTC 10.2 assuming that version of the dll can be found via the normal lookup mechanism.
On Windows if NVRTC is not loadable directly as 'nvrtc' Slang will attempt to search for the newest version of NVRTC on your system. The places searched are...
* The instance directory (where the slang.dll and/or program exe is)
* The CUDA_PATH enivonment variable (if set)
* Directories in PATH that look like a CUDA installation.
If a candidate is found via an earlier mechanism, subsequent searches are not performed. If multiple candidates are found, Slang tries the newest version first.
Binding
=======
Say we have some Slang source like the following:
```
struct Thing { int a; int b; }
Texture2D<float> tex;
SamplerState sampler;
RWStructuredBuffer<int> outputBuffer;
ConstantBuffer<Thing> thing3;
[numthreads(4, 1, 1)]
void computeMain(
uint3 dispatchThreadID : SV_DispatchThreadID,
uniform Thing thing,
uniform Thing thing2)
{
// ...
}
```
This will be turned into a CUDA entry point with
```
struct UniformEntryPointParams
{
Thing thing;
Thing thing2;
};
struct UniformState
{
CUtexObject tex; // This is the combination of a texture and a sampler(!)
SamplerState sampler; // This variable exists within the layout, but it's value is not used.
RWStructuredBuffer<int32_t> outputBuffer; // This is implemented as a template in the CUDA prelude. It's just a pointer, and a size
Thing* thing3; // Constant buffers map to pointers
};
// [numthreads(4, 1, 1)]
extern "C" __global__ void computeMain(UniformEntryPointParams* params, UniformState* uniformState)
```
With CUDA - the caller specifies how threading is broken up, so `[numthreads]` is available through reflection, and in a comment in output source code but does not produce varying code.
The UniformState and UniformEntryPointParams struct typically vary by shader. UniformState holds 'normal' bindings, whereas UniformEntryPointParams hold the uniform entry point parameters. Where specific bindings or parameters are located can be determined by reflection. The structures for the example above would be something like the following...
`StructuredBuffer<T>`,`RWStructuredBuffer<T>` become
```
T* data;
size_t count;
```
`ByteAddressBuffer`, `RWByteAddressBuffer` become
```
uint32_t* data;
size_t sizeInBytes;
```
## Texture
Read only textures will be bound as the opaque CUDA type CUtexObject. This type is the combination of both a texture AND a sampler. This is somewhat different from HLSL, where there can be separate `SamplerState` variables. This allows access of a single texture binding with different types of sampling.
If code relies on this behavior it will be necessary to bind multiple CtexObjects with different sampler settings, accessing the same texture data.
Slang has some preliminary support for TextureSampler type - a combined Texture and SamplerState. To write Slang code that can target CUDA and other platforms using this mechanism will expose the semantics appropriately within the source.
Load is only supported for Texture1D, and the mip map selection argument is ignored. This is because there is tex1Dfetch and no higher dimensional equivalents. CUDA also only allows such access if the backing array is linear memory - meaning the bound texture cannot have mip maps - thus making the mip map parameter superfluous anyway. RWTexture does allow Load on other texture types.
## RWTexture
RWTexture types are converted into CUsurfObject type.
In regular CUDA it is not possible to do a format conversion on an access to a CUsurfObject. Slang does add support for hardware write conversions where they are available. To enable the feature it is necessary to attribute your RWTexture with `format`. For example
```
[format("rg16f")]
RWTexture2D<float2> rwt2D_2;
```
The format names used are the same as for [GLSL layout format types](https://www.khronos.org/opengl/wiki/Layout_Qualifier_(GLSL)). If no format is specified Slang will *assume* that the format is the same as the type specified.
Note that the format attribution is on variables/parameters/fields and not part of the type system. This means that if you have a scenario like...
```
[format(rg16f)]
RWTexture2d<float2> g_texture;
float2 getValue(RWTexture2D<float2> t)
{
return t[int2(0, 0)];
}
void doThing()
{
float2 v = getValue(g_texture);
}
```
Even `getValue` will receive t *without* the format attribute, and so will access it, presumably erroneously. A workaround for this specific scenario would be to attribute the parameter
```
float2 getValue([format("rg16f")] RWTexture2D<float2> t)
{
return t[int2(0, 0)];
}
```
This will only work correctly if `getValue` is called with a `t` that has that format attribute. As it stands no checking is performed on this matching so no error or warning will be produced if there is a mismatch.
There is limited software support for doing a conversion on reading. Currently this only supports only 1D, 2D, 3D RWTexture, backed with half1, half2 or half4. For this path to work NVRTC must have the `cuda_fp16.h` and associated files available. Please check the section on `Half Support`.
If hardware read conversions are desired, this can be achieved by having a Texture<T> that uses the surface of a RWTexture<T>. Using the Texture<T> not only allows hardware conversion but also filtering.
It is also worth noting that CUsurfObjects in CUDA are NOT allowed to have mip maps.
By default surface access uses cudaBoundaryModeZero, this can be replaced using the macro SLANG_CUDA_BOUNDARY_MODE in the CUDA prelude. For HW format conversions the macro SLANG_PTX_BOUNDARY_MODE. These boundary settings are in effect global for the whole of the kernel.
`SLANG_CUDA_BOUNDARY_MODE` can be one of
* cudaBoundaryModeZero causes an execution trap on out-of-bounds addresses
* cudaBoundaryModeClamp stores data at the nearest surface location (sized appropriately)
* cudaBoundaryModeTrap drops stores to out-of-bounds addresses
`SLANG_PTX_BOUNDARY_MODE` can be one of `trap`, `clamp` or `zero`. In general it is recommended to have both set to the same type of value, for example `cudaBoundaryModeZero` and `zero`.
## Sampler
Samplers are in effect ignored in CUDA output. Currently we do output a variable `SamplerState`, but this value is never accessed within the kernel and so can be ignored. More discussion on this behavior is in `Texture` section.
## Unsized arrays
Unsized arrays can be used, which are indicated by an array with no size as in `[]`. For example
```
RWStructuredBuffer<int> arrayOfArrays[];
```
With normal 'sized' arrays, the elements are just stored contiguously within wherever they are defined. With an unsized array they map to `Array<T>` which is...
```
T* data;
size_t count;
```
Note that there is no method in the shader source to get the `count`, even though on the CUDA target it is stored and easily available. This is because of the behavior on GPU targets
* That the count has to be stored elsewhere (unlike with CUDA)
* On some GPU targets there is no bounds checking - accessing outside the bound values can cause *undefined behavior*
* The elements may be laid out *contiguously* on GPU
In practice this means if you want to access the `count` in shader code it will need to be passed by another mechanism - such as within a constant buffer. It is possible in the future support may be added to allow direct access of `count` work across targets transparently.
## Prelude
For CUDA the code to support the code generated by Slang is partly defined within the 'prelude'. The prelude is inserted text placed before the generated CUDA source code. For the Slang command line tools as well as the test infrastructure, the prelude functionality is achieved through a `#include` in the prelude text of the `prelude/slang-cuda-prelude.h` specified with an absolute path. Doing so means other files the `slang-cuda-prelude.h` might need can be specified relatively, and include paths for the backend compiler do not need to be modified.
The prelude needs to define
* 'Built in' types (vector, matrix, 'object'-like Texture, SamplerState etc)
* Scalar intrinsic function implementations
* Compiler based definations/tweaks
For a client application - as long as the requirements of the generated code are met, the prelude can be implemented by whatever mechanism is appropriate for the client. For example the implementation could be replaced with another implementation, or the prelude could contain all of the required text for compilation. Setting the prelude text can be achieved with the method on the global session...
```
/** Set the 'prelude' for generated code for a 'downstream compiler'.
@param passThrough The downstream compiler for generated code that will have the prelude applied to it.
@param preludeText The text added pre-pended verbatim before the generated source
That for pass-through usage, prelude is not pre-pended, preludes are for code generation only.
*/
void setDownstreamCompilerPrelude(SlangPassThrough passThrough, const char* preludeText);
```
The code that sets up the prelude for the test infrastructure and command line usage can be found in ```TestToolUtil::setSessionDefaultPrelude```. Essentially this determines what the absolute path is to `slang-cpp-prelude.h` is and then just makes the prelude `#include "the absolute path"`.
Half Support
============
Slang supports the half/float16 types on CUDA. To do so NVRTC must have access to the `cuda_fp16.h` and `cuda_fp16.hpp` files that are typically distributed as part of the CUDA SDK. When Slang detects the use of half in source, it will define `SLANG_CUDA_ENABLE_HALF` when `slang-cuda-prelude.h` is included. This will in turn try to include `cuda_fp16.h` and enable extra functionality within the prelude for half support.
Slang tries several mechanisms to locate `cuda_fp16.h` when NVRTC is initiated. The first mechanism is to look in the include paths that are passed to Slang. If `cuda_fp16.h` can be found in one of these paths, no more searching will be performed.
If this fails, the path where NVRTC is located will be searched. In that path "include" and "CUDA/include" paths will be searched. This is probably most suitable for Windows based targets, where NVRTC dll is placed along with other binaries. The "CUDA/include" path is used to try and make clear in this scenario what the contained files are for.
If this fails Slang will look for the CUDA_PATH environmental variable, as is typically set during a CUDA SDK installation.
If this fails - the prelude include of `cuda_fp16.h` will most likely fail on NVRTC invocation.
CUDA has the `__half` and `__half2` types defined in `cuda_fp16.h`. The `__half2` can produce results just as quickly as doing the same operation on `__half` - in essence for some operations `__half2` is [SIMD](https://en.wikipedia.org/wiki/SIMD) like. The half implementation in Slang tries to take advantage of this optimization.
Since Slang supports up to 4 wide vectors Slang has to build on CUDAs half support. The types `__half3` and `__half4` are implemented in `slang-cuda-prelude.h` for this reason. It is worth noting that `__half3` is made up of a `__half2` and a `__half`. As `__half2` is 4 byte aligned, this means `__half3` is actually 8 bytes, rather than 6 bytes that might be expected.
One area where this optimization isn't fully used is in comparisons - as in effect Slang treats all the vector/matrix half comparisons as if they are scalar. This could be perhaps be improved on in the future. Doing so would require using features that are not directly available in the CUDA headers.
Wave Intrinsics
===============
There is broad support for [HLSL Wave intrinsics](https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/hlsl-shader-model-6-0-features-for-direct3d-12), including support for [SM 6.5 intrinsics](https://microsoft.github.io/DirectX-Specs/d3d/HLSL_ShaderModel6_5.html).
Most Wave intrinsics will work with vector, matrix or scalar types of typical built in types - `uint`, `int`, `float`, `double`, `uint64_t`, `int64_t`.
The support is provided via both the Slang core module as well as the Slang CUDA prelude found in 'prelude/slang-cuda-prelude.h'. Many Wave intrinsics are not directly applicable within CUDA which supplies a more low level mechanisms. The implementation of most Wave functions work most optimally if a 'Wave' where all lanes are used. If all lanes from index 0 to pow2(n) -1 are used (which is also true if all lanes are used) a binary reduction is typically applied. If this is not the case the implementation fallsback on a slow path which is linear in the number of active lanes, and so is typically significantly less performant.
For more a more concrete example take
```
int sum = WaveActiveSum(...);
```
When computing the sum, if all lanes (32 on CUDA), the computation will require 5 steps to complete (2^5 = 32). If say just one lane is not being used it will take 31 steps to complete (because it is now linear in amount of lanes). So just having one lane disabled required 6 times as many steps. If lanes with 0 - 15 are active, it will take 4 steps to complete (2^4 = 16).
In the future it may be possible to improve on the performance of the 'slow' path, however it will always remain the most efficient generally for all of 0 to pow2(n) - 1 lanes to be active.
It is also worth noting that lane communicating intrinsics performance will be impacted by the 'size' of the data communicated. The size here is at a minimum the amount of built in scalar types used in the processing. The CUDA language only allows direct communication with built in scalar types.
Thus
```
int3 v = ...;
int3 sum = WaveActiveSum(v);
```
Will require 3 times as many steps as the earlier scalar example just using a single int.
## WaveGetLaneIndex
'WaveGetLaneIndex' defaults to `(threadIdx.x & SLANG_CUDA_WARP_MASK)`. Depending on how the kernel is launched this could be incorrect. There are other ways to get lane index, for example using inline assembly. This mechanism though is apparently slower than the simple method used here. There is support for using the asm mechanism in the CUDA prelude using the `SLANG_USE_ASM_LANE_ID` preprocessor define to enable the feature.
There is potential to calculate the lane id using the [numthreads] markup in Slang/HLSL, but that also requires some assumptions of how that maps to a lane index.
## Unsupported Intrinsics
* Intrinsics which only work in pixel shaders
+ QuadXXXX intrinsics
OptiX Support
=============
Slang supports OptiX for raytracing. To compile raytracing programs, NVRTC must have access to the `optix.h` and dependent files that are typically distributed as part of the OptiX SDK. When Slang detects the use of raytracing in source, it will define `SLANG_CUDA_ENABLE_OPTIX` when `slang-cuda-prelude.h` is included. This will in turn try to include `optix.h`.
Slang tries several mechanisms to locate `optix.h` when NVRTC is initiated. The first mechanism is to look in the include paths that are passed to Slang. If `optix.h` can be found in one of these paths, no more searching will be performed.
If this fails, the default OptiX SDK install locations are searched. On Windows this is `%{PROGRAMDATA}\NVIDIA Corporation\OptiX SDK X.X.X\include`. On Linux this is `${HOME}/NVIDIA-OptiX-SDK-X.X.X-suffix`.
If OptiX headers cannot be found, compilation will fail.
Limitations
===========
Some features are not available because they cannot be mapped with appropriate behavior to a target. Other features are unavailable because of resources to devote to more unusual features.
* Not all Wave intrinsics are supported
* There is not complete support for all methods on 'objects' like textures etc.
* Does not currently support combined 'TextureSampler'. A Texture behaves equivalently to a TextureSampler and Samplers are ignored.
* Half type is not currently supported
* GetDimensions is not available on any Texture type currently - as there doesn't appear to be a CUDA equivalent
Language aspects
================
# Arrays passed by Value
Slang follows the HLSL convention that arrays are passed by value. This is in contrast with CUDA where arrays follow C++ conventions and are passed by reference. To make generated CUDA follow this convention an array is turned into a 'FixedArray' struct type.
To get something more similar to CUDA/C++ operation the array can be marked in out or inout to make it passed by reference.

View file

@ -1,25 +0,0 @@
Slang Design and Implementation Notes
=====================================
This directory contains documents that are primarily intended for developers working on the Slang implementation.
They are not intended to be helpful to Slang users.
These documents can only be trusted to reflect the state of the codebase or the plans of their authors at the time they were written. Changes to the implementation are not expected to always come with matching changes to these documents, so some amount of drift is to be expected.
Developers interested in contributing to Slang might want to start with the [Overview](overview.md) document, which describes the overall compilation pipeline that Slang uses and the purpose of the various steps (both implemented and planned).
The [Coding Conventions](coding-conventions.md) document describes the conventions that should be followed in all code added to the Slang project.
The [Interfaces](interfaces.md) document describes the high-level design plan for Slang's interfaces and generics features.
The [Declaration References](decl-refs.md) document is intended to help out developers who are mystified by the heavily used `DeclRef` type in the compiler implementation.
The [Intermediate Representation (IR)](ir.md) document describes the design of Slang's internal IR.
The [Existential Types](existential-types.md) document goes into some detail about what "existential types" are in the context of the Slang language, and explains how we may go about supporting them.
The [Capabilities](capabilities.md) document explains the proposed model for how Slang will support general notions of profile- or capability-based overloading/dispatch.
The [Casting](casting.md) document explains how casting works in the slang C++ compiler code base.
The [Experimental API Interfaces](experimental.md) document explains how experimental Slang API changes are to be deployed.

View file

@ -1,333 +0,0 @@
Reverse Mode Autodiff (Out of Date)
==================================
This document serves as a design reference for reverse-mode auto-diff in the Slang compiler.
## Reverse-Mode Passes
Rather than implementing reverse-mode as a separate pass, Slang implements this as a series of independent passes:
If a function needs a reverse-mode version generated:
- *Linearize* the function, and all dependencies.
- *Propagate* differential types through the linearized code.
- *Unzip* by moving primal insts to before differential insts.
- *Transpose* the differential insts.
## Linearization (Forward-mode)
### Overview
(This is a incomplete section. More details coming soon)
Consider an arbitrary function `float f(float a, float b, float c, ..., z)` which takes in N inputs and generates one output `y`. Linearization aims to generate the first-order Taylor expansion of f about _all_ of it's inputs.
Mathematically, the forward derivative `fwd_f` represents `df/da * (a_0 - a) + df/db * (b_0 - b) + ...`, where `a_0` is the value at which the Taylor expansion was produced. The quantity `a_0 - a` is known as the 'differential' (for brevity we'll denote them da, db, dc, etc..), and there is at-most one differential per input.
Thus, the new function's signature should be `fwd_f(float a, float da, float b, float db, float c, float dc, ...)`. For simplicity, we'll use *pairs* instead of interleaving the original and differential parameters. We use the intrinsic `DifferentialPair<T>` (or for short: `DP<T>`) to denote this.
The signature we use is then `fwd_f(DP<float> a, DP<float> b, DP<float> c)`
An example of linearization:
```C
float f(float a, float b)
{
if (a > 0)
{
return a + b + 2.0 * a * b;
}
else
{
return sqrt(a);
}
}
```
We'll write out the SSA form of this function.
```C
float f_SSA(float a, float b)
{
bool _b1 = a > 0;
if (_b1)
{
float _t1 = a + b;
float _t2 = 2.0 * a;
float _t3 = _t2 * b;
float _t4 = _t1 + _t3;
return _t4;
}
else
{
float _t1 = sqrt(a);
return _t1;
}
}
DP<float> f_SSA(DP<float> dpa, DP<float> dpb)
{
bool _b1 = dpa.p > 0;
if (_b1)
{
float _t1 = dpa.p + dpb.p;
float _t1_d = dpa.d + dpb.d;
float _t2 = 2.0 * dpa.p;
float _t2_d = 0.0 * dpa.p + 2.0 * dpa.d;
float _t3 = _t2 * dpb.p;
float _t3_d = _t2_d * dpb.p + _t2 * dpb.d;
float _t4 = _t1 + _t3;
float _t4_d = _t1_d + _t3_d;
return DP<float>(_t4, _t4_d);
}
else
{
DP<float> _t1_dp = sqrt_fwd(dpa);
return DP<float>(_t1_dp.p, _t1_dp.d);
}
}
```
In the result, the primal part of the pair holds the original computation, while the differential part computes the dot product of the differentials with the derivatives of the function's output w.r.t each input.
## Propagation
This step takes a linearized function and propagates information about which instructions are computing a differential and which ones are part of the primal (original) computation.
Assuming first-order differentiation only:
The approach will be to mark any instructions that extract the differential from the differential pair as a differential. Then any instruction that uses the differential is itself marked as a differential and so on. The only exception is the call instruction which is either non-differentiable (do nothing) or differentiable and returns a pair (follow the same process)
Here's the above example with propagated type information (we use float.D to denote intermediaries that have been marked as differential, and also expand everything so that each line has a single operation)
```C
DP<float> f_SSA_Proped(DP<float> dpa, DP<float> dpb)
{
bool _b1 = dpa.p > 0;
if (_b1)
{
float _t1 = dpa.p + dpb.p;
float.D _q1_d = dpa.d;
float.D _q2_d = dpb.d;
float.D _t1_d = _q1_d + _q2_d;
float _t2 = 2.0 * dpa.p;
float.D _q2_d = dpa.d;
float.D _q3_d = 2.0 * dpa.d;
float _q4 = dpa.p;
float.D _q4_d = 0.0 * dpa.p;
float.D _t2_d = _q4_d + _q3_d;
float _t3 = _t2 * dpb.p;
float _q5 = dpb.p;
float.D _q6_d = _q5 * _t2_d;
float.D _q7_d = dpb.d;
float.D _q8_d = _t2 * _q7_d
float _t3_d = _q6_d + _q8_d;
float _t4 = _t1 + _t3;
float.D _t4_d = _t1_d + _t3_d;
return DP<float>(_t4, _t4_d);
}
else
{
DP<float> _t1_dp = sqrt_fwd(dpa);
float _q1 = _t1_dp.p;
float.D _q1_d = _t1_dp.d;
return DP<float>(_q1, _q1_d);
}
}
```
## Unzipping
This is a fairly simple process when there is no control flow. We simply move all non-differential instructions to before the first differential instruction.
When there is control flow, we need to be a bit more careful: the key is to *replicate* the control flow graph once for primal and once for the differential.
Here's the previous example unzipped:
```C
DP<float> f_SSA_Proped(DP<float> dpa, DP<float> dpb)
{
bool _b1 = dpa.p > 0;
float _t1, _t2, _q4, _t3, _q5, _t3_d, _t4, _q1;
if (_b1)
{
_t1 = dpa.p + dpb.p;
_t2 = 2.0 * dpa.p;
_q4 = dpa.p;
_t3 = _t2 * dpb.p;
_q5 = dpb.p;
_t4 = _t1 + _t3;
}
else
{
_q1 = sqrt_fwd(DP<float>(dpa.p, 0.0));
}
// Note here that we have to 'store' all the intermediaries
// _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally
// the tradeoff between fwd_mode and rev_mode
if (_b1)
{
float.D _q1_d = dpa.d;
float.D _q2_d = dpb.d;
float.D _t1_d = _q1_d + _q2_d;
float.D _q2_d = dpa.d;
float.D _q3_d = 2.0 * dpa.d;
float.D _q4_d = 0.0 * dpa.p;
float.D _t2_d = _q4_d + _q3_d;
float.D _q6_d = _q5 * _t2_d;
float.D _q7_d = dpb.d;
float.D _q8_d = _t2 * _q7_d
float.D _t3_d = _q6_d + _q8_d;
float.D _t4_d = _t1_d + _t3_d;
return DP<float>(_t4, _t4_d);
}
else
{
DP<float> _t1_dp = sqrt_fwd(dpa);
float.D _q1_d = _t1_dp.d;
return DP<float>(_q1, _q1_d);
}
}
```
## Transposition
### Overview
This transposition pass _assumes_ that provided function is linear in it's differentials.
It is out of scope of this project to attempt to enforce that constraint for user-defined differential code.
For transposition we walk all differential instructions in reverse starting from the return statement, and apply the following rules:
We'll have an accumulator dictionary `Dictionary<IRInst, IRInst> accMap` holding assignments for
intermediaries which don't have concrete variables. When we add a pair (A, C) and (A, B) already exists, this will form the pair (A, ADD(C, B)) in the dictionary. (ADD will be replaced with a call to `T.dadd` for a generic type T)
- If `inst` is a `RETURN(A)`, add pair `(A, d_out)` to `accMap`
- If an instruction is `MUL(P, D)` where D is the differential, add pair `(D, MUL(P, accMap[this_inst]))` to `accMap`
- If an instruction is `ADD(D1, D2)`, where both D1 and D2 are differentials (this is the only config that should occur), then add pair `(D1, accMap[this_inst])` to `accMap`
- If an instruction is `CALL(f_fwd, (P1, D1), (P2, D2), ...)`, create variables D1v, D2v, ... for D1, D2, ..., then replace with `CALL(f_rev, (P1, D1v), (P2, D2v), ..., accMap[this_inst])`, and finally add pairs `(D1, LOAD[D1v]), (D2, LOAD[D2v]), ...` to `accMap`
```C
void f_SSA_Rev(inout DP<float> dpa, inout DP<float> dpb, float dout)
{
bool _b1 = dpa.p > 0;
float _t1, _t2, _q4, _t3, _q5, _t3_d, _t4, _q1;
if (_b1)
{
_t1 = dpa.p + dpb.p;
_t2 = 2.0 * dpa.p;
_q4 = dpa.p;
_t3 = _t2 * dpb.p;
_q5 = dpb.p;
_t4 = _t1 + _t3;
}
else
{
_q1 = sqrt_fwd(DP<float>(dpa.p, 0.0));
}
// Note here that we have to 'store' all the intermediaries
// _t1, _t2, _q4, _t3, _q5, _t3_d, _t4 and _q1. This is fundamentally
// the tradeoff between fwd_mode and rev_mode
if (_b1)
{
float.D _t4_rev = d_out;
float.D _t1_rev = _t4_rev;
float.D _t3_rev = _t4_rev;
float.D _q8_rev = _t3_rev;
float.D _q6_rev = _t3_rev;
float.D _q7_rev = _t2 * _q8_rev;
dpb.d += _q7_rev;
float.D _t2_rev = _q5 * _q6_rev;
float.D _q4_rev = _t2_rev;
float.D _q3_rev = _t2_rev;
dpa.d += 2.0 * _q3_rev;
float.D _q1_rev = _t1_rev;
float.D _q2_rev = _t1_rev;
dpb.d += _q2_rev;
dpa.d += _q1_rev;
}
else
{
_q1_rev = d_out;
DP<float> dpa_copy;
sqrt_rev(dpa_copy, _q1_rev);
dpa.d += dpa_copy.d;
}
}
```

View file

@ -1,396 +0,0 @@
<!--The goal of this set of documents is to describe the design of Slang's automatic differentiation passes, along with the mechanisms & passes used to support various features. -->
This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
## What is Automatic Differentiation?
Before diving into the design of the automatic differentiation (for brevity, we will call it 'auto-diff') passes, it is important to understand the end goal of what auto-diff tries to achieve.
The over-arching goal of Slang's auto-diff is to enable the user to compute derivatives of a given shader program or function's output w.r.t its input parameters. This critical compiler feature enables users to quickly use their shaders with gradient-based parameter optimization algorithms, which forms the backbone of modern machine learning systems. It enables users to train and deploy graphics systems that contain ML primitives (like multi-layer perceptron's or MLPs) or use their shader programs as differentiable primitives within larger ML pipelines.
### More Resources
Here are some links to resources that talk more about differentiable programming from a more mathematical perspective:
1. UCSD CSE 291 (Spring 2024): https://cseweb.ucsd.edu/~tzli/cse291/sp2024/
2. UW CSE 5990 (Winter 2024): https://sites.google.com/cs.washington.edu/cse-599o-dppl
## Definition of Derivatives
This section is based off of these slides: https://cseweb.ucsd.edu/~tzli/cse291/sp2024/lectures/03_forward_mode.pdf.
Here, we establish the mathematical definition of derivatives, starting with a simple 1D case (function with a single input and output), and extending to the general case of functions mapping multiple inputs to multiple outputs.
To avoid confusion, we will denote mathematical functions using LaTeX italic script ($f$, $g$, etc..) and programs that compute these functions with markdown code (`f`, `g`, etc..)
### Derivatives of scalar (1D) functions
Consider the simplest case: a smooth scalar mathematical function that maps a real number to another real number:
$$f : \mathbb{R} \to \mathbb{R}$$
There are several definitions for a derivative, but we will use the definition that a derivative is the *closest linear approximation* of the output function at a given input location.
Concretely, given a specific input $x$, we can create a linear approximation of the function $f$ around $x$ as follows:
$$ f(x + dx) \approx f(x) + Df(x) \cdot dx $$
<!--// TODO: Add image here.-->
This can also be understood as a geometric 'tangent' to the function at $x$. $Df(x)$ is the slope of $f$ at $x$, i.e. $\frac{\partial f}{\partial x}$, and $dx$ is the perturbation away from $x$. Our approximation is linear as a function of the perturbation $dx$. Note that no matter how non-linear or complex the underlying function $f(x)$ is, the approximation is always linear (this property becomes very important later).
### Forward-mode derivative functions
Now consider a concrete program `f` that computes some function.
```C
// Computes square of x
float f(float x)
{
return x * x;
}
```
What should its derivative program look like? We the need the output $f(x)$ and the product of derivative at $x$, $Df(x)$ with the differential $dx$.
In Slang, we put both of these together into a single function, called the *forward-mode derivative* function, which takes in a pair $(x, dx)$ returns a pair $(f(x), Df(x)\cdot dx)$ Note that in auto-diff literature, this is also often referred to as the *total derivative* function.
```C
DifferentialPair<float> fwd_f(DifferentialPair<float> dpx)
{
float x = dpx.getPrimal(); // Can also be accessed via property dpx.p
float dx = dpx.getDifferential(); // Can also be accessed via property dpx.d
return makePair(x * x, (2 * x) * dx);
}
```
Note that `(2 * x)` is the multiplier corresponding to $Df(x)$. We refer to $x$ and $f(x)$ as "*primal*" values and the perturbations $dx$ and $Df(x)\cdot dx$ as "*differential*" values. The reason for this separation is that the "*differential*" output values are always linear w.r.t their "*differential*" inputs.
As the name implies, `DifferentialPair<T>` is a special pair type used by Slang to hold values and their corresponding differentials.
### Forward-mode derivatives for higher-dimensional functions
In practice, most functions tend to have multiple inputs and multiple outputs, i.e. $f: \mathbb{R}^N \to \mathbb{R}^M$
The definition above can be extended to higher dimensions, using the closest-linear-approximation idea. The main difference is that the derivative function represents a hyperplane rather than a line.
Effectively, we want our forward-mode derivative to compute the following:
$$ f(\mathbf{x} + \mathbf{dx}) \approx f(\mathbf{x}) + \langle Df(\mathbf{x}),\mathbf{dx}\rangle $$
Here, the input and its differential can be represented as a vector quantity $\mathbf{x}, \mathbf{dx} \in \mathbb{R}^N$ and the multiplier $Df(\mathbf{x})$ (also known as the *Jacobian* matrix) is a NxM matrix, and $\left\< \cdot,\cdot \right\>$ denotes the inner product (i.e. matrix-vector multiplication)
Here's an example of a Slang function taking in two inputs (N=2) and generating one output (M=1)
```C
// Compute length of hypotenuse.
float f(float x, float y)
{
return sqrt(x * x + y * y);
}
```
and its forward-mode derivative:
```C
// Closest linear approximation at x, y
DifferentialPair<float> fwd_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy)
{
float x = dpx.p;
float y = dpy.p;
float dx = dpx.d;
float dy = dpx.d;
return DifferentialPair<float>(
sqrt(x * x + y * y), // f(x, y)
(x * dx + y * dy) / sqrt(x * x, y * y)); // <Df(x,y), dx>
}
```
Important note: the forward-mode function only needs to compute the inner product $\langle Df(\mathbf{x}),\mathbf{dx} \rangle$. The Jacobian matrix itself never needs to be fully materialized. This is a key design element of automatic differentiation, one which allows it to scale to huge input/output counts.
### Building Blocks: Forward-mode derivatives compose in forward order of execution.
In practice, we compute forward-mode derivatives of a complex function by decomposing them into constituent functions (or in compiler-speak: instructions) and composing the forward-mode derivative of each piece in the **same** order.
This is because of each forward derivative is a 'right-side' product (or product of Jacobian matrix with a vector)
Here's an example of this in action (consider a complex function $h$ composed of $f$ and $g$):
$$ h(\mathbf{x}) = f(g(\mathbf{x})) $$
It's forward-mode derivative is then:
$$ \langle Dh(\mathbf{x}), \mathbf{dx}\rangle = \big\langle Df(\mathbf{x}), \langle Dg(\mathbf{x}), \mathbf{dx}\rangle\big\rangle $$
which is the forward-mode derivative of the outer function $f$ evaluated on the result of the forward-mode derivative of the inner function $g$.
An example of this in Slang code:
```C
// Compute square.
float sqr(float x)
{
return x * x;
}
// Compute length of hypotenuse.
float f(float x, float y)
{
float x_sqr = sqr(x);
float y_sqr = sqr(y)
return sqrt(x_sqr + y_sqr);
}
```
The resulting derivative of `f` can be computed by composition:
```C
// Forward-mode derivative of sqr()
DifferentialPair<float> fwd_sqr(DifferentialPair<float> dpx)
{
float x = dpx.getPrimal();
float dx = dpx.getDifferential();
return DifferentialPair<float>(x * x, 2 * x * dx);
}
// Forward-mode derivative of f()
DifferentialPair<float> fwd_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy)
{
DifferentialPair<float> dp_x_sqr = fwd_sqr(dpx);
DifferentialPair<float> dp_y_sqr = fwd_sqr(dpy);
float x_sqr = dp_x_sqr.getPrimal();
float y_sqr = dp_y_sqr.getPrimal();
float x_sqr_d = dp_x_sqr.getDifferential();
float y_sqr_d = dp_y_sqr.getDifferential();
return DifferentialPair<float>(
sqrt(x_sqr + y_sqr),
(x_sqr_d + y_sqr_d) / sqrt(x_sqr + y_sqr));
}
```
### Tip: Extracting partial derivatives from a forward-mode derivative (i.e. a 'total' derivative)
As we discussed above, forward-mode derivatives compute $\langle Df(\mathbf{x}),\mathbf{dx}\rangle$ rather than what you may be used to seeing in a calculus course (e.g. partial derivatives like $\frac{\partial f}{\partial x}$).
In fact, the forward-mode derivative is simply an product of the partial derivative w.r.t each input parameter multiplied by their differential perturbations $\frac{\partial f}{\partial x} * dx + \frac{\partial f}{\partial x} * dy$. This is the reason for the alternative name: *total derivative*.
Thus, partial derivative can be obtained by successively setting each input's differential to 1 (and 0 for everything else)
Example:
```C
// Compute partial derivative w.r.t x (pass dx=1.0)
float df_dx = fwd_f(DifferentialPair<float>(x, 1.0), DifferentialPair<float>(y, 0.0)).d;
// Compute partial derivaive w.r.t y (pass dy=1.0)
float df_dy = fwd_f(DifferentialPair<float>(x, 0.0), DifferentialPair<float>(y, 1.0)).d;
```
### Tip: Testing forward-mode derivatives using the first principles of calculus (i.e. the *finite difference* method)
In Calculus, partial derivatives of a function are often defined in a 'black box' manner using limits, by perturbing a single parameter by an infinitesimal amount:
$$ \frac{\partial f}{\partial x} = \lim_{dx\to 0} \frac{f(x + dx) - f(x - dx)}{2 * dx} $$
At the moment, we cannot leverage programming languages to compute true inifinitesimal limits, but we can replace $dx \to 0$ with a sufficiently small $\epsilon$ leading to the following 'test' to check if derivatives produced by automatic differentiation match with their true mathematical expected values.
Here's an example of using this idea to test functions (many autodiff tests were written this way)
```C
// Compute partial derivative w.r.t x analytically
float df_dx_ad = fwd_f(DifferentialPair<float>(x, 1.0), DifferentialPair<float>(y, 0.0))
// Compute partial derivative w.r.t x through the finite difference (FD) method.
float eps = 1e-4
float df_dx_fd = (f(x + eps, y) - f(x - eps, y)) / (2 * eps);
// If computed correctly, df_dx_ad and df_dx_fd are very close.
```
**Caveats:**
Since the finite difference method only produces a biased estimate of the derivative, the result is only numerically *close* to the auto-diff-based result. Poorly behaved functions (those that rapidly change, or are discontinuous or otherwise non-differentiable) will result in a (expected) mismatch between FD and AD results.
## Reverse-mode derivative functions
This section is based off of these slides: https://cseweb.ucsd.edu/~tzli/cse291/sp2024/lectures/05_reverse_mode.pdf.
### Motivation: Challenges with scaling forward-mode derivatives
A big problem with forward-mode derivatives is their inability to scale to great parameter counts.
Machine learning pipelines often compute derivatives of a large complex pipeline with millions or even billions of input parameters, but a single output value, i.e. the *loss* or *objective* function, frequently denoted by $\mathcal{L}$.
Computing $\frac{\partial \mathcal{L}}{\partial x_i}$ for $N$ inputs $x_i$ using the one-hot vector approach will involve invoking the forward-mode derivative function $N$ times.
The reason for this limitation is that forward-mode derivatives pass derivatives from the inputs through to the outputs by computing the dot-product $\left\< Df(\mathbf{x}),\mathbf{dx}\right\>$.
Instead, we employ a different approach called the reverse-mode derivative, which propagates differentials *backwards* from outputs to inputs.
### Key Idea: Generate code to compute $\langle \frac{\partial \mathcal{L}}{\partial f}, Df(\mathbf{x})\rangle$ rather than $\langle Df(\mathbf{x}),\mathbf{dx}\rangle$
The fundamental building blocks of reverse-mode derivatives are the **left-side inner product**. That is, the product of a vector of derivatives of w.r.t outputs $\frac{\partial \mathcal{L}}{\partial f}$ with the Jacobian matrix $Df(\mathbf{x})$.
An important thing to keep in mind is that it does not necessarily matter what the scalar quantity $\mathcal{L}$ is. The goal of this product is to propagate the derivatives of any scalar value $\mathcal{L}$ w.r.t output vector $f(\mathbf{x})$ (i.e., $\frac{\partial \mathcal{L}}{\partial f}$) into derivatives of that same scalar value $\mathcal{L}$ w.r.t the input vector $\mathbf{x}$ (i.e., $\frac{\partial \mathcal{L}}{\partial \mathbf{x}}$).
Here's an example of a Slang function computing the `reverse-mode derivative`.
```C
// Compute length of hypotenuse
float f(float x, float y)
{
return sqrt(x * x + y * y);
}
// Reverse-mode derivative of f. dOutput represents the derivative dL/dOutput of the output w.r.t scalar value.
void rev_f(inout DifferentialPair<float> dpx, inout DifferentialPair<float> dpy, float dOutput)
{
float x = dpx.getPrimal();
float y = dpy.getPrimal();
float t = 1.0 / (sqrt(x * x + y * y));
dpx = DifferentialPair<float>(
x, // The primal part of the return value is *always* copied in from the input as-is.
dOutput * x * t); // The differential part for x is the derivative dL/dx computed as
// (dL/dOutput) * (dOutput/dx), where dOutput/dx = x / sqrt(x*x+y*y).
dpy = DifferentialPair<float>(
y,
dOutput * y * t); // The differential part for y is the derivative dL/dy computed as
// (dL/dOutput) * (dOutput/dy), where dOutput/dy = y / sqrt(x*x+y*y).
}
```
Note that `rev_f` accepts derivatives w.r.t the output value as the input, and returns derivatives w.r.t inputs as its output (through `inout` parameters). `rev_f` still needs the primal values `x` and `y` to compute the derivatives, so those are still passed in as an input through the primal part of the differential pair.
Also note that the reverse-mode derivative function does not have to compute the primal result value (its return is void). The reason for this is a matter of convenience: reverse-mode derivatives are often invoked after all the primal functions, and there is typically no need for these values. We go into more detail on this topic in the checkpointing chapter.
The reverse mode function can be used to compute both `dOutput/dx` and `dOutput/dy` with a single invocation (unlike the forward-mode case where we had to invoke `fwd_f` once for each input)
```C
DifferentialPair<float> dpx = makePair<float>(x, 0.f); // Initialize diff-value to 0 (not necessary)
DifferentialPair<float> dpx = makePair<float>(y, 0.f); // Initialize diff-value to 0 (not necessary)
rev_f(dpx, dpy, 1.0); // Pass 1.0 for dL/dOutput so that the results are (1.0 * dOutput/dx) and (1.0 * dOutput/dy)
float doutput_dx = dpx.getDifferential();
float doutput_dy = dpy.getDifferential();
```
### Extension to multiple outputs
The extension to multiple outputs is fairly natural. Each output gets a separate input for its derivative.
Here is an example:
```C
// Computation involving multiple inputs and outputs.
float2 f_multi_output(float x, float y)
{
return float2(
x * x,
x + y);
}
// Reverse-mode derivative of 'f_multi_output'. The derivative of the outputs is also a vector quantity
// (type follows from return type of f_multi_output)
void rev_f_multi_output(DifferentialPair<float> dpx, DifferentialPair<float> dpy, float2 dOut)
{
float x = dpx.getPrimal();
float y = dpy.getPrimal();
dpx = DifferentialPair<float>(x, dOut[0] * 2 * x + dOut[1]);
dpy = DifferentialPair<float>(x, dOut[1]);
}
```
### Jacobian method: Generate forward- and reverse-mode derivatives from first principles.
A simple way to figure out what the generated reverse (or forward) derivative function is supposed to compute is to write down the entire Jacobian function. That is, write down the partial derivative of each input w.r.t each output
$$
D\mathbf{f}(\mathbf{x}) = \begin{bmatrix}
\partial f_0 / \partial x & \partial f_0 / \partial y \\
\partial f_1 / \partial x & \partial f_1 / \partial y \\
\end{bmatrix} =
\begin{bmatrix}
2x & 0.0 \\
1.0 & 1.0 \\
\end{bmatrix}
$$
The **reverse-mode derivative**'s outputs should match the left-product of this matrix with the vector of derivatives w.r.t outputs:
$$ \left\langle \frac{\partial \mathcal{L}}{\partial \mathbf{f}}, D\mathbf{f}(\mathbf{x})\right\rangle =
\begin{bmatrix}
\frac{\partial \mathcal{L}}{\partial f_0} & \frac{\partial \mathcal{L}}{\partial f_1}
\end{bmatrix}
\begin{bmatrix}
2x & 0.0 \\
1.0 & 1.0 \\
\end{bmatrix} =
\begin{bmatrix} \left(\frac{\partial \mathcal{L}}{\partial f_0} \cdot 2x + \frac{\partial \mathcal{L}}{\partial f_1}\right) & \frac{\partial \mathcal{L}}{\partial f_1} \end{bmatrix}
$$
and the **forward-mode derivative**'s outputs should match the right-product of this matrix with the vector of differentials of the inputs:
$$ \langle D\mathbf{f}(\mathbf{x}), d\mathbf{x}\rangle =
\begin{bmatrix}
2x & 0.0 \\
1.0 & 1.0 \\
\end{bmatrix}
\begin{bmatrix}
dx \\ dy
\end{bmatrix} =
\begin{bmatrix} 2x \cdot dx & dx + dy \end{bmatrix}
$$
Note that when we generate derivative code in practice, we do not materialize the full Jacobian matrix, and instead use the composition property to chain together derivatives at the instruction level.
However, the resulting code is equivalent to the Jacobian method (mathematically), and it is a good, analytical way to confirm that the generated code is indeed correct (or when thinking about what the derivative of a particular instruction/set of instructions should be)
### Building Blocks: Reverse-mode derivatives compose in reverse order of execution.
A consequence of using the 'left-side inner product' is that derivatives of a composite function must be computed in the reverse of the order of primal computation.
Here's an example of a composite function $h$ (similar to the example used in forward-mode building blocks):
$$ h(\mathbf{x}) = f(g(\mathbf{x})) $$
where (for brevity):
$$ \mathbf{y} = g(\mathbf{x}) $$
The reverse-mode derivative function for $h$ can be written as the composition of the reverse-mode derivatives of $f$ and $g$
$$ \left\langle \frac{\partial L}{\partial h}, Dh(\mathbf{x})\right\rangle = \left\langle \left\langle \frac{\partial L}{\partial h}, Df(\mathbf{y})\right\rangle , Dg(\mathbf{x})\right\rangle $$
Note the 'backward' order here. We must first pass the derivatives through the outer function $f$, and then pass the result through the inner function $g$ to compute derivatives w.r.t inner-most inputs $\mathbf{x}$. This process of passing derivatives backwards is often referred to as *backpropagation*.
A more concrete Slang example of the same:
```C
// Compute square
float sqr(float x)
{
return x * x;
}
// Compute length of hypotenuse
float f(float x, float y)
{
return sqrt(sqr(x) + sqr(y));
}
```
The derivative functions are then:
```C
void rev_sqr(DifferentialPair<float> dpx, float dOutput)
{
float x = dpx.getPrimal();
dpx = DifferentialPair<float>(x, dOutput * 2 * x);
}
void rev_f(DifferentialPair<float> dpx, DifferentialPair<float> dpy, float dOut)
{
float t = 0.5f / sqrt(x * x + y * y);
float d_xsqr = t * dOut; // Calculate derivatives w.r.t output of sqr(x)
float d_ysqr = t * dOut; // Calculate derivatives w.r.t output of sqr(y)
rev_sqr(dpx, d_xsqr); // Propagate to x
rev_sqr(dpx, d_ysqr); // Propagate to y
}
```
When comparing `rev_f`'s implementation to `fwd_f`, note the order of computing derivative w.r.t `sqr` (in `rev_f`, `rev_sqr` is called at the end, while in `fwd_f` it is called at the beginning)

View file

@ -1,92 +0,0 @@
This document details auto-diff-related decorations that are lowered in to the IR to help annotate methods with relevant information.
## `[Differentiable]`
The `[Differentiable]` attribute is used to mark functions as being differentiable. The auto-diff process will only touch functions that are marked explicitly as `[Differentiable]`. All other functions are considered non-differentiable and calls to such functions from a differentiable function are simply copied as-is with no transformation.
Further, only `[Differentiable]` methods are checked during the derivative data-flow pass. This decorator is translated into `BackwardDifferentiableAttribute` (which implies both forward and backward differentiability), and then lowered into the IR `OpBackwardDifferentiableDecoration`
**Note:** `[Differentiable]` was previously implemented as two separate decorators `[ForwardDifferentiable]` and `[BackwardDifferentiable]` to denote differentiability with each type of auto-diff transformation. However, these are now **deprecated**. The preferred approach is to use only `[Differentiable]`
`fwd_diff` and `bwd_diff` cannot be directly called on methods that don't have the `[Differentiable]` tag (will result in an error). If non-`[Differentiable]` methods are called from within a `[Differentiable]` method, they must be wrapped in `no_diff()` operation (enforced by the [derivative data-flow analysis pass](./types.md#derivative-data-flow-analysis) )
### `[Differentiable]` for `interface` Requirements
The `[Differentiable]` attribute can also be used to decorate interface requirements. In this case, the attribute is handled in a slightly different manner, since we do not have access to the concrete implementations.
The process is roughly as follows:
1. During the semantic checking step, when checking a method that is an interface requirement (in `checkCallableDeclCommon` in `slang-check-decl.cpp`), we check if the method has a `[Differentiable]` attribute
2. If yes, we construct create a set of new method declarations, one for the forward-mode derivative (`ForwardDerivativeRequirementDecl`) and one for the reverse-mode derivative (`BackwardDerivativeRequirementDecl`), with the appropriate translated function types and insert them into the same interface.
3. Insert a new member into the original method to reference the new declarations (`DerivativeRequirementReferenceDecl`)
4. When lowering to IR, the `DerivativeRequirementReferenceDecl` member is converted into a custom derivative reference by adding the `OpBackwardDerivativeDecoration(deriv-fn-req-key)` and `OpForwardDerivativeDecoration(deriv-fn-req-key)` decorations on the primal method's requirement key.
Here is an example of what this would look like:
```C
interface IFoo
{
[Differentiable]
float bar(float);
};
// After checking & lowering
interface IFoo_after_checking_and_lowering
{
[BackwardDerivative(bar_bwd)]
[ForwardDerivative(bar_fwd)]
float bar(float);
void bar_bwd(inout DifferentialPair<float>, float);
DifferentialPair<float> bar_fwd(DifferentialPair<float>);
};
```
**Note:** All conforming types must _also_ declare their corresponding implementations as differentiable so that their derivative implementations are synthesized to match the interface signature. In this sense, the `[Differentiable]` attribute is part of the functions signature, so a `[Differentiable]` interface requirement can only be satisfied by a `[Differentiable]` function implementation
### `[TreatAsDifferentiable]`
In large codebases where some interfaces may have several possible implementations, it may not be reasonable to have to mark all possible implementations with `[Differentiable]`, especially if certain implementations use hacks or workarounds that need additional consideration before they can be marked `[Differentiable]`
In such cases, we provide the `[TreatAsDifferentiable]` decoration (AST node: `TreatAsDifferentiableAttribute`, IR: `OpTreatAsDifferentiableDecoration`), which instructs the auto-diff passes to construct an 'empty' function that returns a 0 (or 0-equivalent) for the derivative values. This allows the signature of a `[TreatAsDifferentiable]` function to match a `[Differentiable]` requirement without actually having to produce a derivative.
## Custom derivative decorators
In many cases, it is desirable to manually specify the derivative code for a method rather than let the auto-diff pass synthesize it from the method body. This is usually desirable if:
1. The body of the method is too complex, and there is a simpler, mathematically equivalent way to compute the same value (often the case for intrinsics like `sin(x)`, `arccos(x)`, etc..)
2. The method involves global/shared memory accesses, and synthesized derivative code may cause race conditions or be very slow due to overuse of synchronization. For this reason Slang assumes global memory accesses are non-differentiable by default, and requires that the user (or the core module) define separate accessors with different derivative semantics.
The Slang front-end provides two sets of decorators to facilitate this:
1. To reference a custom derivative function from a primal function: `[ForwardDerivative(fn)]` and `[BackwardDerivative(fn)]` (AST Nodes: `ForwardDerivativeAttribute`/`BackwardDerivativeAttribute`, IR: `OpForwardDervativeDecoration`/`OpBackwardDerivativeDecoration`), and
2. To reference a primal function from its custom derivative function: `[ForwardDerivativeOf(fn)]` and `[BackwardDerivativeOf(fn)]` (AST Nodes: `ForwardDerivativeAttributeOf`/`BackwardDerivativeAttributeOf`). These attributes are useful to provide custom derivatives for existing methods in a different file without having to edit/change that module. For instance, we use `diff.meta.slang` to provide derivatives for the core module functions in `hlsl.meta.slang`. When lowering to IR, these references are placed on the target (primal function). That way both sets of decorations are lowered on the primal function.
These decorators also work on generically defined methods, as well as struct methods. Similar to how function calls work, these decorators also work on overloaded methods (and reuse the `ResolveInoke` infrastructure to perform resolution)
### Checking custom derivative signatures
To ensure that the user-provided derivatives agree with the expected signature, as well as resolve the appropriate method when multiple overloads are available, we check the signature of the custom derivative function against the translated version of the primal function. This currently occurs in `checkDerivativeAttribute()`/`checkDerivativeOfAttribute()`.
The checking process re-uses existing infrastructure from `ResolveInvoke`, by constructing a temporary invoke expr to call the user-provided derivative using a set of 'imaginary' arguments according to the translated type of the primal method. If `ResolveInvoke` is successful, the provided derivative signature is considered to be a match. This approach also automatically allows us to resolve overloaded methods, account for generic types and type coercion.
## `[PrimalSubstitute(fn)]` and `[PrimalSubstituteOf(fn)]`
In some cases, we face the opposite problem that inspired custom derivatives. That is, we want the compiler to auto-synthesize the derivative from the function body, but there _is_ no function body to translate.
This frequently occurs with hardware intrinsic operations that are lowered into special op-codes that map to hardware units, such as texture sampling & interpolation operations.
However, these operations do have reference 'software' implementations which can be used to produce the derivative.
To allow user code to use the fast hardware intrinsics for the primal pass, but use synthesized derivatives for the derivative pass, we provide decorators `[PrimalSubstitute(ref-fn)]` and `[PrimalSubstituteOf(orig-fn)]` (AST Node: `PrimalSubstituteAttribute`/`PrimalSubstituteOfAttribute`, IR: `OpPrimalSubstituteDecoration`), that can be used to provide a reference implementation for the auto-diff pass.
Example:
```C
[PrimalSubstitute(sampleTexture_ref)]
float sampleTexture(TexHandle2D tex, float2 uv)
{
// Hardware intrinsics
}
float sampleTexture_ref(TexHandle2D tex, float2 uv)
{
// Reference SW implementation.
}
void sampleTexture_bwd(TexHandle2D tex, inout DifferentialPair<float2> dp_uv, float dOut)
{
// Backward derivate code synthesized using the reference implementation.
}
```
The implementation of `[PrimalSubstitute(fn)]` is relatively straightforward. When the transcribers are asked to synthesize a derivative of a function, they check for a `OpPrimalSubstituteDecoration`, and swap the current function out for the substitute function before proceeding with derivative synthesis.

View file

@ -1,290 +0,0 @@
This documentation is intended for Slang contributors and is written from a compiler engineering point of view. For Slang users, see the user-guide at this link: [https://shader-slang.com/slang/user-guide/autodiff.html](https://shader-slang.com/slang/user-guide/autodiff.html)
Before diving into this document, please review the document on [Basics](./basics.md) for the fundamentals of automatic differentiation.
# Components of the Type System
Here we detail the main components of the type system: the `IDifferentiable` interface to define differentiable types, the `DifferentialPair<T>` type to carry a primal and corresponding differential in a single type.
We also detail how auto-diff operators are type-checked (the higher-order function checking system), how the `no_diff` decoration can be used to avoid differentiation through attributed types, and the derivative data flow analysis that warns the the user of unintentionally stopping derivatives.
## `interface IDifferentiable`
Defined in core.meta.slang, `IDifferentiable` forms the basis for denoting differentiable types, both within the core module, and otherwise.
The definition of `IDifferentiable` is designed to encapsulate the following 4 items:
1. `Differential`: The type of the differential value of the conforming type. This allows custom data-structures to be defined to carry the differential values, which may be optimized for space instead of relying solely on compiler synthesis/
Since the computation of derivatives is inherently linear, we only need access to a few operations. These are:
2. `dadd(Differential, Differential) -> Differential`: Addition of two values of the differential type. It's implementation must be associative and commutative, or the resulting derivative code may be incorrect.
3. `dzero() -> Differential`: Additive identity (i.e. the zero or empty value) that can be used to initialize variables during gradient aggregation
4. `dmul<S:__BuiltinRealType>(S, Differential)`: Scalar multiplication of a real number with the differential type. It's implementation must be distributive over differential addition (`dadd`).
Points 2, 3 & 4 are derived from the concept of vector spaces. The derivative values of any Slang function always form a vector space (https://en.wikipedia.org/wiki/Vector_space).
### Derivative member associations
In certain scenarios, the compiler needs information on how the fields in the original type map to the differential type. Particularly, this is a problem when differentiate the implicit construction of a struct through braces (i.e. `{}`), represented by `kIROp_MakeStruct`. We provide the decorator `[DerivativeMember(DifferentialTypeName.fieldName)]` (ASTNode: DerivativeMemberAttribute, IR: kIROp_DerivativeMemberDecoration) to explicitly mark these associations.
Example
```C
struct MyType : IDifferentiable
{
typealias Differential = MyDiffType;
float a;
[DerivativeMember(MyDiffType.db)]
float b;
/* ... */
};
struct MyDiffType
{
float db;
};
```
### Automatic Synthesis of `IDifferentible` Conformances for Aggregate Types
It can be tedious to expect users to hand-write the associated `Differential` type, the corresponding mappings and interface methods for every user-defined `struct` type. For aggregate types, these are trivial to construct by analysing which of their components conform to `IDifferentiable`.
The synthesis proceeds in roughly the following fashion:
1. `IDifferentiable`'s components are tagged with special decorator `__builtin_requirement(unique_integer_id)` which carries an enum value from `BuiltinRequirementKind`.
2. When checking that types conform to their interfaces, if a user-provided definition does not satisfy a requirement with a built-in tag, we perform synthesis by dispatching to `trySynthesizeRequirementWitness`.
3. For _user-defined types_, Differential **types** are synthesized during conformance-checking through `trySynthesizeDifferentialAssociatedTypeRequirementWitness` by checking if each constituent type conforms to `IDifferentiable`, looking up the corresponding `Differential` type, and constructing a new aggregate type from these differential types. Note that since it is possible that a `Differential` type of a constituent member has not yet been synthesized, we have additional logic in the lookup system (`trySynthesizeRequirementWitness`) that synthesizes a temporary empty type with a `ToBeSynthesizedModifier`, so that the fields can be filled in later, when the member type undergoes conformance checking.
4. For _user-defined types_, Differential methods (`dadd`, `dzero` and `dmul`) are synthesized in `trySynthesizeDifferentialMethodRequirementWitness` by utilizing the `Differential` member and its `[DifferentialMember]` decorations to determine which fields need to be considered and the base type to use for each field. There are two synthesis patterns. The fully-inductive pattern is used for `dadd` and `dzero` which works by calling `dadd` and `dzero` respectively on the individual fields of the `Differential` type under consideration.
Example:
```C
// Synthesized from "struct T {FT1 field1; FT2 field2;}"
T.Differential dadd(T.Differential a, T.Differential b)
{
return Differential(
FT1.dadd(a.field1, b.field1),
FT2.dadd(a.field2, b.field2),
)
}
```
On the other hand, `dmul` uses the fixed-first arg pattern since the first argument is a common scalar, and proceeds inductively on all the other args.
Example:
```C
// Synthesized from "struct T {FT1 field1; FT2 field2;}"
T.Differential dmul<S:__BuiltinRealType>(S s, T.Differential a)
{
return Differential(
FT1<S>.dmul(s, a.field1),
FT2<S>.dmul(s, a.field2),
)
}
```
5. During auto-diff, the compiler can sometimes synthesize new aggregate types. The most common case is the intermediate context type (`kIROp_BackwardDerivativeIntermediateContextType`), which is lowered into a standard struct once the auto-diff pass is complete. It is important to synthesize the `IDifferentiable` conformance for such types since they may be further differentiated (through higher-order differentiation). This implementation is contained in `fillDifferentialTypeImplementationForStruct(...)` and is roughly analogous to the AST-side synthesis.
### Differentiable Type Dictionaries
During auto-diff, the IR passes frequently need to perform lookups to check if an `IRType` is differentiable, and retrieve references to the corresponding `IDifferentiable` methods. These lookups also need to work on generic parameters (that are defined inside generic containers), and existential types that are interface-typed parameters.
To accommodate this range of different type systems, Slang uses a type dictionary system that associates a dictionary of relevant types with each function. This works in the following way:
1. When `CheckTerm()` is called on an expression within a function that is marked differentiable (`[Differentiable]`), we check if the resolved type conforms to `IDifferentiable`. If so, we add this type to the dictionary along with the witness to its differentiability. The dictionary is currently located on `DifferentiableAttribute` that corresponds to the `[Differentiable]` modifier.
2. When lowering to IR, we create a `DifferentiableTypeDictionaryDecoration` which holds the IR versions of all the types in the dictionary as well as a reference to their `IDifferentiable` witness tables.
3. When synthesizing the derivative code, all the transcriber passes use `DifferentiableTypeConformanceContext::setFunc()` to load the type dictionary. `DifferentiableTypeConformanceContext` then provides convenience functions to lookup differentiable types, appropriate `IDifferentiable` methods, and construct appropriate `DifferentialPair<T>`s.
### Looking up Differential Info on _Generic_ types
Generically defined types are also lowered into the differentiable type dictionary, but rather than having a concrete witness table, the witness table is itself a parameter. When auto-diff passes need to find the differential type or place a call to the IDifferentiable methods, this is turned into a lookup on the witness table parameter (i.e. `Lookup(<InterfaceRequirementKey>, <WitnessTableParameter>)`). Note that these lookups instructions are inserted into the generic parent container rather than the inner most function.
Example:
```C
T myFunc<T:IDifferentiable>(T a)
{
return a * a;
}
// Reverse-mode differentiated version
void bwd_myFunc<T:IDifferentiable>(
inout DifferentialPair<T> dpa,
T.Differential dOut) // T.Differential is Lookup('Differential', T_Witness_Table)
{
T.Differential da = T.dzero(); // T.dzero is Lookup('dzero', T_Witness_Table)
da = T.dadd(dpa.p * dOut, da); // T.dadd is Lookup('dadd', T_Witness_Table)
da = T.dadd(dpa.p * dOut, da);
dpa = diffPair(dpa.p, da);
}
```
### Looking up Differential Info on _Existential_ types
Existential types are interface-typed values, where there are multiple possible implementations at run-time. The existential type carries information about the concrete type at run-time and is effectively a 'tagged union' of all possible types.
#### Differential type of an Existential
The differential type of an existential type is tricky to define since our type system's only restriction on the `.Differential` type is that it also conforms to `IDifferentiable`. The differential type of any interface `IInterface : IDifferentiable` is therefore the interface type `IDifferentiable`. This is problematic since Slang generally requires a static `anyValueSize` that must be a strict upper bound on the sizes of all conforming types (since this size is used to allocate space for the union). Since `IDifferentiable` is defined in the core module `core.meta.slang` and can be used by the user, it is impossible to define a reliable bound.
We instead provide a new **any-value-size inference** pass (`slang-ir-any-value-inference.h`/`slang-ir-any-value-inference.cpp`) that assembles a list of types that conform to each interface in the final linked IR and determines a relevant upper bound. This allows us to ignore types that conform to `IDifferentiable` but aren't used in the final IR, and generate a tighter upper bound.
**Future work:**
This approach, while functional, creates a locality problem since the size of `IDifferentiable` is the max of _all_ types that conform to `IDifferentiable` in visible modules, even though we only care about the subset of types that appear as `T.Differential` for `T : IInterface`. The reason for this problem is that upon performing an associated type lookup, the Slang IR drops all information about the base interface that the lookup starts from and only considers the constraint interface (in this case `Differential : IDifferentiable`).
There are several ways to resolve this issue, including (i) a static analysis pass that determines the possible set of types at each use location and propagates them to determine a narrower set of types, or (ii) generic (or 'parameterized') interfaces, such as `IDifferentiable<T>` where each version can have a different set of conforming types.
<!--#### IDifferentiable Method lookups on an Existential
All other method lookups are performed using existential-type lookups on the existential parameter. The idea is that existential-typed parameters come with a witness-table component that can be accessed by invoking `kIROp_ExtractExistentialWitnessTable` on them. This allows us to look up the `dadd`/`dzero` methods on this witness table in the same way as we did for generic types.-->
Example:
```C
interface IInterface : IDifferentiable
{
[Differentiable]
This foo(float val);
[Differentiable]
float bar();
};
float myFunc(IInterface obj, float a)
{
IInterface k = obj.foo(a);
return k.bar();
}
// Reverse-mode differentiated version (in pseudo-code corresponding to IR, some of these will get lowered further)
void bwd_myFunc(
inout DifferentialPair<IInterface> dpobj,
inout DifferentialPair<float> dpa,
float.Differential dOut) // T.Differential is Lookup('Differential', T_Witness_Table)
{
// Primal pass..
IInterface obj = dpobj.p;
IInterface k = obj.foo(a);
// .....
// Backward pass
DifferentialPair<IInterface> dpk = diffPair(k);
bwd_bar(dpk, dOut);
IDifferentiable dk = dpk.d; // Differential of `IInterface` is `IDifferentiable`
DifferentialPair<IInterface> dp = diffPair(dpobj.p);
bwd_foo(dpobj, dpa, dk);
}
```
#### Looking up `dadd()` and `dzero()` on Existential Types
There are two distinct cases for lookup on an existential type. The more common case is the closed-box existential type represented simply by an interface. Every value of this type contains a type identifier & a witness table identifier along with the value itself. The less common case is when the function calls are performed directly on the value after being cast to the concrete type.
**`dzero()` for "closed" Existential type: The `NullDifferential` Type**
For concrete and even generic types, we can initialize a derivative accumulator variable by calling the appropriate `Type.dzero()` method. This is unfortunately not possible when initializing an existential differential (which is currently of type `IDifferentiable`), since we must also initialize the type-id of this existential to one of the implementations, but we do not know which one yet since that is a run-time value that only becomes known after the first differential value is generated.
To get around this issue, we declare a special type called `NullDifferential` that acts as a "none type" for any `IDifferentiable` existential object.
**`dadd()` for "closed" Existential types: `__existential_dadd`**
We cannot directly use `dadd()` on two existential differentials of type `IDifferentiable` because we must handle the case where one of them is of type `NullDifferential` and `dadd()` is only defined for differentials of the same type.
We handle this currently by synthesizing a special method called `__existential_dadd` (`getOrCreateExistentialDAddMethod` in `slang-ir-autodiff.cpp`) that performs a run-time type-id check to see if one of the operand is of type `NullDifferential` and returns the other operand if so. If both are non-null, we dispatch to the appropriate `dadd` for the concrete type.
**`dadd()` and `dzero()` for "open" Existential types**
If we are dealing with values of the concrete type (i.e. the opened value obtained through `ExtractExistentialValue(ExistentialParam)`). Then we can perform lookups in the same way we do for generic type. All existential parameters come with a witness table. We insert instructions to extract this witness table and perform lookups accordingly. That is, for `dadd()`, we use `Lookup('dadd', ExtractExistentialWitnessTable(ExistentialParam))` and place a call to the result.
## `struct DifferentialPair<T:IDifferentiable>`
The second major component is `DifferentialPair<T:IDifferentiable>` that represents a pair of a primal value and its corresponding differential value.
The differential pair is primarily used for passing & receiving derivatives from the synthesized derivative methods, as well as for block parameters on the IR-side.
Both `fwd_diff(fn)` and `bwd_diff(fn)` act as function-to-function transformations, and so the Slang front-end translates the type of `fn` to its derivative version so the arguments can be type checked.
### Pair type lowering.
The differential pair type is a special type throughout the AST and IR passes (AST Node: `DifferentialPairType`, IR: `kIROp_DifferentialPairType`) because of its use in front-end semantic checking and when synthesizing the derivative code for the functions. Once the auto-diff passes are complete, the pair types are lowering into simple `struct`s so they can be easily emitted (`DiffPairLoweringPass` in `slang-ir-autodiff-pairs.cpp`).
We also define additional instructions for pair construction (`kIROp_MakeDifferentialPair`) and extraction (`kIROp_DifferentialPairGetDifferential` & `kIROp_DifferentialPairGetPrimal`) which are lowered into struct construction and field accessors, respectively.
### "User-code" Differential Pairs
Just as we use special IR codes for differential pairs because they have special handling in the IR passes, sometimes differential pairs should be _treated as_ regular struct types during the auto-diff passes.
This happens primarily during higher-order differentiation when the user wishes to differentiate the same code multiple times.
Slang's auto-diff approaches this by rewriting all the relevant differential pairs into 'irrelevant' differential pairs (`kIROp_DifferentialPairUserCode`) and 'irrelevant' accessors (`kIROp_DifferentialPairGetDifferentialUserCode`, `kIROp_DifferentialPairGetPrimalUserCode`) at the end of **each auto-diff iteration** so that the next iteration treats these as regular differentiable types.
The user-code versions are also lowered into `struct`s in the same way.
## Type Checking of Auto-Diff Calls (and other _higher-order_ functions)
Since `fwd_diff` and `bwd_diff` are represented as higher order functions that take a function as an input and return the derivative function, the front-end semantic checking needs some notion of higher-order functions to be able to check and lower the calls into appropriate IR.
### Higher-order Invocation Base: `HigherOrderInvokeExpr`
All higher order transformations derive from `HigherOrderInvokeExpr`. For auto-diff there are two possible expression classes `ForwardDifferentiateExpr` and `BackwardDifferentiateExpr`, both of which derive from this parent expression.
### Higher-order Function Call Checking: `HigherOrderInvokeExprCheckingActions`
Resolving the concrete method is not a trivial issue in Slang, given its support for overloading, type coercion and more. This becomes more complex with the presence of a function transformation in the chain.
For example, if we have `fwd_diff(f)(DiffPair<float>(...), DiffPair<double>(...))`, we would need to find the correct match for `f` based on its post-transform argument types.
To facilitate this we use the following workflow:
1. The `HigherOrderInvokeExprCheckingActions` base class provides a mechanism for different higher-order expressions to implement their type translation (i.e. what is the type of the transformed function).
2. The checking mechanism passes all detected overloads for `f` through the type translation and assembles a new group out of the results (the new functions are 'temporary')
3. This new group is used by `ResolveInvoke` when performing overload resolution and type coercion using the user-provided argument list.
4. The resolved signature (if there is one) is then replaced with the corresponding function reference and wrapped in the appropriate higher-order invoke.
**Example:**
Let's say we have two functions with the same name `f`: (`int -> float`, `double, double -> float`)
and we want to resolve `fwd_diff(f)(DiffPair<float>(1.0, 0.0), DiffPair<float>(0.0, 1.0))`.
The higher-order checking actions will synthesize the 'temporary' group of translated signatures (`int -> DiffPair<float>`, `DiffPair<double>, DiffPair<double> -> DiffPair<float>`).
Invoke resolution will then narrow this down to a single match (`DiffPair<double>, DiffPair<double> -> DiffPair<float>`) by automatically casting the `float`s to `double`s. Once the resolution is complete,
we return `InvokeExpr(ForwardDifferentiateExpr(f : double, double -> float), casted_args)` by wrapping the corresponding function in the corresponding higher-order expr
## Attributed Types (`no_diff` parameters)
Often, it will be necessary to prevent gradients from propagating through certain parameters, for correctness reasons. For example, values representing random samples are often not differentiated since the result may be mathematically incorrect.
Slang provides the `no_diff` operator to mark parameters as non-differentiable, even if they use a type that conforms to `IDifferentiable`
```C
float myFunc(float a, no_diff float b)
{
return a * b;
}
// Resulting fwd-mode derivative:
DiffPair<float> myFunc(DiffPair<float> dpa, float b)
{
return diffPair(dpa.p * b, dpa.d * b);
}
```
Slang uses _OpAttributedType_ to denote the IR type of such parameters. For example, the lowered type of `b` in the above example is `OpAttributedType(OpFloat, OpNoDiffAttr)`. In the front-end, this is represented through the `ModifiedType` AST node.
Sometimes, this additional layer can get in the way of things like type equality checks and other mechanisms where the `no_diff` is irrelevant. Thus, we provide the `unwrapAttributedType` helper to remove attributed type layers for such cases.
## Derivative Data-Flow Analysis
Slang has a derivative data-flow analysis pass that is performed on a per-function basis immediately after lowering to IR and before the linking step (`slang-ir-check-differentiability.h`/`slang-ir-check-differentiability.cpp`).
The job of this pass is to enforce that instructions that are of a differentiable type will propagate a derivatives, unless explicitly dropped by the user through `detach()` or `no_diff`. The reason for this is that Slang requires functions to be decorated with `[Differentiable]` to allow it to propagate derivatives. Otherwise, the function is considered non-differentiable, and effectively produces a 0 derivative. This can lead to frustrating situations where a function may be dropping non-differentiable on purpose. Example:
```C
float nonDiffFunc(float x)
{
/* ... */
}
float differentiableFunc(float x) // Forgot to annotate with [Differentiable]
{
/* ... */
}
float main(float x)
{
// User doesn't realise that the function that is supposed to be differentiable is not
// getting differentiated, because the types here are all 'float'.
//
return nonDiffFunc(x) * differentiableFunc(x);
}
```
The data-flow analysis step enforces that non-differentiable functions used in a differentiable context should get their derivative dropped explicitly. That way, it is clear to the user whether a call is getting differentiated or dropped.
Same example with `no_diff` enforcement:
```C
float nonDiffFunc(float x)
{
/* ... */
}
[Differentiable]
float differentiableFunc(float x)
{
/* ... */
}
float main(float x)
{
return no_diff(nonDiffFunc(x)) * differentiableFunc(x);
}
```
A `no_diff` can only be used directly on a function call, and turns into a `TreatAsDifferentiableDecoration` that indicates that the function will not produce a derivative.
The derivative data-flow analysis pass works similar to a standard data-flow pass:
1. We start by assembling a set of instructions that 'produce' derivatives by starting with the parameters of differentiable types (and without an explicit `no_diff`), and propagating them through each instruction in the block. An inst carries a derivative if there one of its operands carries a derivative, and the result type is differentiable.
2. We then assemble a set of instructions that expect a derivative. These are differentiable operands of differentiable functions (unless they have been marked by `no_diff`). We then reverse-propagate this set by adding in all differentiable operands (and repeating this process).
3. During this reverse-propagation, if there is any `OpCall` in the 'expect' set that is not also in the 'produce' set, then we have a situation where the gradient hasn't been explicitly dropped, and we create a user diagnostic.

View file

@ -1,271 +0,0 @@
Capabilities (Out of Date)
============
Slang aims to be a portable language for shader programming, which introduces two complementary problems:
1. We need a way to indicate that certain constructs (types, functions, etc.) are only allowed on certain targets, so that a user gets a meaningful error if they try to do something that won't work on one or more of the APIs or platforms they want to target. Similarly, the user expects to get an error if they call a fragment-shader-specific function inside of, say, compute shader code, or vice versa.
2. If the same feature can be implemented across multiple platforms, but the best (or only) implementation path differs across platforms, then we need a way to express the platform specific code and pick the right implementation per-target.
Item (2) is traditionally handled with preprocessor techniques (e.g., `#ifdef`ing the body of a function based on target platform), but that of course requires that the user invoke the Slang front end once for each target platform, and target-specific coding in a library will then "infect" code that uses that library, forcing them to invoke the front-end once per target as well.
We are especially sensitive to this problem in the compiler itself, because we have to author and maintain the Slang standard modules, which needs to (1) expose the capabilities of many platforms and (2) work across all those platforms. It would be very unfortunate if we had to build different copies of our standard modules per-target.
The intention in Slang is to solve both of these problems with a system of *capabilities*.
What is a capability?
---------------------
For our purposes a capability is a discrete feature that a compilation target either does or does not support.
We could imagine defining a capability for the presence of texture sampling operations with implicit gradients; this capability would be supported when generating fragment shader kernel code, but not when generating code for other stages.
Let's imagine a language syntax that the standard modules could use to define some *atomic* capabilities:
```
capability implicit_gradient_texture_fetches;
```
We can then imagine using attributes to indicate that a function requires a certain capability:
```
struct Texture2D
{
...
// Implicit-gradient sampling operation.
[availableFor(implicit_gradient_texture_fetches)]
float4 Sample(SamplerState s, float2 uv);
}
```
(Note that the `[availableFor(...)]` syntax is just a straw-man to write up examples, and a better name would be desirable if/when we implement this stuff.)
Given those declarations, we could then check when compiling code if the user is trying to call `Texture2D.Sample` in code compiled for a target that *doesn't* support implicit-gradient texture fetches, and issue an appropriate error.
The details on how to sequence this all in the compiler will be covered later.
Derived Capabilities
--------------------
Once we can define atomic capabilities, the next step is to be able to define *derived* capabilities.
Let's imagine that we extend our `capability` syntax so that we can define a new capability that automatically implies one or more other capabilities:
```
capability fragment : implicit_gradient_texture_fetches;
```
Here we've said that whenever the `fragment` capability is available, we can safely assume that the `implicit_gradient_texture_fetches` capability is available (but not vice versa).
Given even a rudimentary tool like that, we can start to build up capabilities that relate closely to the "profiles" in things like D3D:
```
capability d3d;
capability sm_5_0 : d3d;
capability sm_5_1 : sm_5_0;
capability sm_6_0 : sm_5_1;
...
capability d3d11 : d3d, sm_5_0;
capability d3d12 : d3d, sm_6_0;
capability khronos;
capability glsl_400 : khronos;
capability glsl_410 : glsl_400;
...
capability vulkan : khronos, glsl_450;
capability opengl : khronos;
```
Here we are saying that `sm_5_1` supports everything `sm_5_0` supports, and potentially more. We are saying that `d3d12` supports `sm_6_0` but maybe not, e.g., `sm_6_3`.
We are expressing that fact that having a `glsl_*` capability means you are on some Khronos API target, but that it doesn't specify which one.
(The exact details of these declarations obviously aren't the point; getting a good hierarchy of capabilities will take time.)
Capability Composition
----------------------
Sometimes we'll want to give a distinct name to a specific combination of capabilities, but not say that it supports anything new:
```
capability ps_5_1 = sm_5_1 & fragment;
```
Here we are saying that the `ps_5_1` capability is *equivalent* to the combination of `sm_5_1` and `fragment` (that is, if you support both `sm_5_1` and `fragment` then you support `ps_5_1` and vice versa).
Compositions should be allowed in `[availableFor(...)]` attributes (e.g., `[availableFor(vulkan & glsl_450)]`), but pre-defined compositions should be favored when possible.
When composing things with `&` it is safe for the compiler to filter out redundancies based on what it knows so that, e.g., `ps_5_0 & fragment` resolves to just `ps_5_0`.
Once we have an `&` operator for capabilities, it is easy to see that "derived" capabilities are really syntax sugar, so that a derived capability like:
```
capability A : B, C
```
could have been written instead as :
```
capability A_atomic
capability A = A_atomic & B & C
```
Where the `A_atomic` capability guarantees that `A` implies `B` and `C` but not vice versa.
It is also useful to think of an `|` operator on capabilities.
In particular if a function has multiple `[availableFor(...)]` attributes:
```
[availableFor(vulkan & fragment)]
[availableFor(d3d12 & fragment)]
void myFunc();
```
This function should be equivalent to one with just a single `[availableFor((vulkan & fragment) | (d3d12 & fragment))]` which is equivalent to `[availableFor((vulkan | d3d12) & fragment)]`.
Simplification should generally push toward "disjunctive normal form," though, rather than pursue simplifications like that.
Note that we do *not* include negation, so that capabilities are not general Boolean expressions.
Validation
----------
For a given function definition `F`, the front end will scan its body and see what it calls, and compose the capabilities required by the called functions using `&` (simplifying along the way). Call the resulting capability (in disjunctive normal form) `R`.
If `F` doesn't have an `[availableFor(...)]` attribute, then we can derive its *effective* `[availableFor(...)]` capability as `R` (this probably needs to be expressed as an iterative dataflow problem over the call graph, to handle cycles).
If `F` *does* have one or more `[availableFor(...)]` clauses that amount to a declared capability `C` (again in disjunctive normal form), then we can check that `C` implies `R` and error out if it is not the case.
A reasonable implementation would track which calls introduced which requirements, and be able to explain *why* `C` does not capture the stated requirements.
For a shader entry point, we should check it as if it had an `[availableFor(...)]` that is the OR of all the specified target profiles (e.g., `sm_5_0 | glsl_450 | ...`) ANDed with the specified stage (e.g., `fragment`).
Any error here should be reported to the user.
If an entry point has an explicit `[availableFor(...)]` then we should AND that onto the profile computed above, so that the user can restrict certain entry points to certain profiles.
In order to support separate compilation, the functions that are exported from a module should probably either have explicit availability attributes, or else they will be compiled against a kind of "default capability" used for the whole module.
Downstream code that consumes such a module would see declarations with explicit capabilities only.
Picking an appropriate "default capability" to use when compiling modules is an important challenge; it would in practice define the "min spec" to use when compiling.
Capability Overriding
---------------------
It should be possible to define multiple versions of a function, having different `[availableFor(...)]` attributes:
```
[availableFor(vulkan)] void myFunc() { ... }
[availableFor(d3d12)] void myFunc() { ... }
```
For front-end checking, these should be treated as if they were a single definition of `myFunc` with an ORed capability (e.g., `vulkan | d3d12`).
Overload resolution will pick the "best" candidate at a call site based *only* on the signatures of the function (note that this differs greatly from how profile-specific function overloading works in Cg).
The front-end will then generate initial IR code for each definition of `myFunc`.
Each of the IR functions will have the *same* mangled name, but different bodies, and each will have appropriate IR decorations to indicate the capabilities it requires.
The choice of which definition to use is then put off until IR linking for a particular target.
At that point we can look at all the IR functions matching a given mangled name, filter them according to the capabilities of the target, and then select the "best" one.
In general a definition `A` of an IR symbol is better than another definition `B` if the capabilities on `A` imply those on `B` but not versa.
(In practice this probably needs to be "the capabilities on `A` intersected with those of the target," and similarly for `B`)
This approach allows us to defer profile-based choices of functions to very late in the process. The one big "gotcha" to be aware of is when functions are overloaded based on pipeline stage, where we would then have to be careful when generating DXIL or SPIR-V modules with multiple entry points (as a single function `f` might need to be specialized twice if it calls a stage-overloaded function `g`).
Capabilities in Other Places
----------------------------
So far I've talked about capabilities on functions, but they should also be allowed on other declarations including:
- Types, to indicate that code using that type needs the given capability
- Interface conformances, to indicate that a type only conforms to the interface when the capabilities are available
- Struct fields, to indicate that the field is only present in the type when the capabilities are present
- Extension declarations, to indicate that everything in them requires the specified capabilities
We should also provide a way to specify that a `register` or other layout modifier is only applicable for specific targets/stages. Such a capability nominally exists in HLSL today, but it would be much more useful if it could be applied to specify target-API-specific bindings.
Only functions should support overloading based on capability. In all other cases there can only be one definition of an entity, and capabilities just decide when it is available.
API Extensions as Capabilities
------------------------------
One clear use case for capabilities is to represent optional extensions, including cases where a feature is "built-in" in D3D but requires an extension in Vulkan:
```
capability KHR_secret_sauce : vulkan;
[available_for(sm_7_0)] // always available for D3D Shader Model 7.0
[available_for(KHR_secret_sauce)] // Need the "secret sauce" extension for Vulkan
void improveShadows();
```
When generating code for Vulkan, we should be able to tell the user that the `improveShadows()` function requires the given extension. The user should be able to express compositions of capabilities in their `-profile` option (and similarly for the API):
```
slangc code.slang -profile vulkan+KHR_secret_sauce
```
(Note that for the command line, it is beneficial to use `+` instead of `&` to avoid conflicts with shell interpreters)
An important question is whether the compiler should automatically infer required extensions without them being specified, so that it produces SPIR-V that requires extensions the user didn't ask for.
The argument against such inference is that users should opt in to non-standard capabilities they are using, but it would be unfortunate if this in turn requires verbose command lines when invoking the compiler.
It should be possible to indicate the capabilities that a module or entry point should be compiled to use without command-line complications.
(A related challenge is when a capability can be provided by two different extensions: how should the compiler select the "right" one to use?)
Disjoint Capabilities
---------------------
Certain compositions of capabilities make no sense. If a user declared a function as needing `vulkan & d3d12` they should probably get an error message.
Knowing that certain capabilities are disjoint can also help improve the overall user experience.
If a function requires `(vulkan & extensionA) | (d3d12 & featureb)` and we know we are compiling for `vulkan` we should be able to give the user a pointed error message saying they need to ask for `extensionA`, because adding `featureB` isn't going to do any good.
As a first-pass model we could have a notion of `abstract` capabilities that are used to model the root of hierarchies of disjoint capabilities:
```
abstract capability api;
abstract capability d3d : api;
capability d3d11 : d3d;
capability d3d12 : d3d;
abstract capability khronos : api;
capability vulkan : khronos;
capability opengl : khronos;
```
As a straw man: we could have a rule that to decide if non-abstract capabilities `A` and `B` are disjoint, we look for their common ancestor in the tree of capabilities.
If the common ancestor is abstract, they are disjoint, and if not they not disjoint.
We'd also know that if the user tries to compile for a profile that includes an abstract capability but *not* some concrete capability derived from it, then that is an error (we can't generate code for just `d3d`).
The above is an over-simplification because we don't have a *tree* of capabilities, but a full *graph*, so we'd need an approach that works for the full case.
Interaction with Generics/Interfaces
------------------------------------
It should be possible for an interface requirement to have a capability requirement attached to it.
This would mean that users of the interface can only use the method/type/whatever when the capability is present (just like for any other function):
```
interface ITexture
{
float4 sampleLevel(float2 uv, float lod);
[availableFor(fragment)]
float4 sample(float2 uv); // can only call this from fragment code
}
```
When implementing an interface, any capability constraints we put on a member that satisfies an interface requirement would need to guarantee that either:
- the capabilities on our method are implied by those on the requirement (we don't require more), or
- the capabilities on the method are implied by those on the type itself, or its conformance to the interface (you can't use the conformance without the capabilities), or
- the capabilities are already implied by those the whole module is being compiled for
In each case, you need to be sure that `YourType` can't be passed as a generic argument to some function that uses just the `ITexture` interface above and have them call a method on your type from a profile that doesn't have the required capabilities.
Interaction with Heterogeneity
------------------------------
If Slang eventually supports generating CPU code as well as shaders, it should use capabilities to handle the CPU/GPU split similar to how they can be used to separate out vertex- and fragment-shader functionality.
Something like a `cpu` profile that works as a catch-all for typical host CPU capabilities would be nice, and could be used as a convenient way to mark "host" functions in a file that is otherwise compiled for a "default profile" that assumes GPU capabilities.
Conclusion
----------
Overall, the hope is that in many cases developers will be able to use capability-based partitioning and overloading of APIs to build code that only has to pass through the Slang front-end once, but that can then go through back-end code generation for each target.
In cases where this can't be achieved, the way that capability-based overloading is built into the Slang IR design means that we should be able to merge multiple target-specific definitions into one IR module, so that a module can employ target-specific specializations while still presenting a single API to consumers.

View file

@ -1,150 +0,0 @@
Casting in the Slang Compiler
=============================
The following discussion is about casting within the C++ implementation of the slang compiler.
C++'s built in mechanisms for casting (principally dynamic_cast) is problematic within the slang compiler codebase. Code using 'dynamic_cast' requires RTTI information is available, and that a type that uses it must have a vtbl (have at least one virtual member). Some problems with this...
* There are types which we want to 'dynamic_cast' that do not have, and we do not want to have a Vtbl (for example Slang::IRInst).
* There are types which a 'dynamic_cast' doesn't do quite what we want (for example casting on Type* derived types typically wants to work on their canonical type)
* We may want to replace use of dynamic_cast in the future for speed/space or other reasons
* It is common in the code base when using a 'smart pointer' type to cast it, but still return a smart pointer
To deal with these issues we need casting within Slang to follow it's own methodology. In summary it is as follows...
* Use 'as' free function to do a typical 'dynamic like' cast.
* 'as' doesn't guarantee the returned pointer points to the same object.
* For example with Type* it *actually* does the cast on the canonical type which is often a different object.
* If you want to *literally* do a dynamic cast use 'dynamicCast' free function.
* This guarantees the returned pointer points to the same object (like normal dynamic_cast)
* If you want to return a smart pointer from a cast from a smart pointer use the .as or .dynamicCast *methods*
* If you want to determine if an 'as' cast is possible on a smart pointer use the .is method
* Doing so will produce more efficient code because a new smart pointer does not need to be constructed
These functions will also work with types that do not have Vtbl - like IRInst derived types.
Both 'as' and 'dynamicCast' handle the case if the pointer is a nullptr, by returning a nullptr. If the cast succeeds the cast pointer is returned otherwise nullptr is returned. If a cast is performed with a free function it always returns a raw pointer.
So why have 'as' and 'dynamicCast' - they seem sort of similar? The primary difference is dynamicCast *must* always return a pointer to the same object, whilst 'as' *can* return a pointer to a different object if that is the desired 'normal' casting behavior for the type. This is the case for Type* when using 'as' it may return a different object - the 'canonical type' for the Type*. For a concrete example take 'NamedExpressionType', its canonical type is the type the name relates to. If you use 'as' on it - it will produce a pointer to a different object, an object that will not be castable back into a NamedExpressionType.
Also keep in mind that 'as' behavior is based on the pointer type being cast from. For any pointer to a type derived from Type it will cast the canonical type. **BUT** if the pointer is pointing to a Type derived *object*, but the pointer type is *not* derived from Type (like say RefObject*), then 'as' will behave like dynamicCast.
All this being said 'as' in usage is seen as the 'default' way to do a 'dynamic like' cast with these special behaviour appropriate for the type when necessary.
By having the free function and method versions of 'as' and 'dynamicCast', you can choose if you want a 'raw' or 'smart' pointer type returned from the cast. If you just want to test if something is a certain type, then using as/dynamicCast free functions is the faster way to do it. If you *know* that a raw pointer is ok, because the object will remain in scope, then again using the free function is better because it does less work. But as the examples following show, care is needed because if you get it wrong the object might go out of scope and leave the raw pointer pointing to a deleted object. When in doubt the safe choice is to typically use .as (or .dynamicCast if appropriate) methods.
Following example shows the different types of casting...
```C++
void someFunction(Decl* decl, Type* type)
{
RefPtr<Decl> declRefPtr(decl);
RefPtr<Type> typeRefPtr(type);
// Use of as
{
// Casting with as on a free function returns a raw pointer
GenericDecl* genericDeclRaw0 = as<GenericDecl>(decl);
// Free function again returns a raw pointer
GenericDecl* genericDeclRaw1 = as<GenericDecl>(declRefPtr);
// Using the as *method* returns a smart pointer holding the cast result
RefPtr<GenericDecl> genericDeclRefPtr0 = declRefPtr.as<GenericDecl>();
// Of course you can use auto with either
auto genericDeclRefPtr1 = declRefPtr.as<GenericDecl>();
auto genericDeclRaw2 = as<GenericDecl>(declRefPtr);
}
// Currently using as on anything not cast *from* Type is the same as dynamicCast.
// But on Type* sometimes you may want to control the cast
{
// With a NamedExpressionType sometimes you don't want 'as' behaviour - if we want to see the information about the name (not the thing
// it relates to (the canonical type)
NamedExpressionType* namedExpressionRawPtr = dynamicCast<NamedExpressionType>(type);
// Returns the smart pointer
auto namedExpressionRefPtr = typeRefPtr.as<NamedExpressionType>();
}
```
It is important to be aware of what style of cast you use where. Take for example the following function ...
```C++
RefPtr<Expr> substitute(RefPtr<Expr> expr) const
{
return DeclRefBase::Substitute(expr);
}
```
If you want to do a cast on it, you need to be careful especially about scope, for example...
```C++
RefPtr<Expr> expr = ...;
{
// Whoops! This is a problem. When using the free function, the cast is to a *raw* pointer, so obj
// receives a raw pointer. When the RefPtr returned from Substitute goes out of scope (when the statement is left)
// the ref will be removed and if the ref count was 1 destroyed. Now obj points to a freed object and so a crash is
// likely to follow in the future!
auto obj = as<RefObject>(substitute(expr));
}
// So how do we avoid this? Well it depends what the function is returning and the scope. If it's returning a smart pointer,
// you could use the .as method
{
// This can only compile if it is a smart pointer (raw pointers don't have an as method)
auto obj = substitute(expr).as<RefObject>();
}
// Another option is to put the created thing in a smart pointer so you know it's in scope
{
RefPtr<Expr> sub = substitute(expr);
// Ok as long as sub is in scope
auto obj = as<RefObject>(sub);
}
// More awkwardly you could use free function, but assign to a smart pointer, thus maintaining scope
{
RefPtr<RefObject> obj = as<RefObject>(substitute(expr));
}
```
The following code shows the change in behavior of 'as' is based on the source *pointer* type **NOT** the *object* type..
```C++
// Derives from Type
NamedExpressionType* exprType = ...;
// Will be the Type* of the *canonical* type, because the pointer is Type derived and we are using as!
Type* type0 = as<Type>(exprType);
// It' going to be pointing to a different object, because type0 is the cast of the *canonical* type, because exprType derives from Type
SLANG_ASSERT(type0 != exprType);
// If I do a dynamicCast the result is either nullptr or a pointer that *must* point to the same object
Type* type1 = dynamicCast<Type>(exprType);
SLANG_ASSERT(type1 == exprType);
// Here, the pointer is pointing to a NamedExpressionType derived object. Which derives from Type. BUT our pointer here does *not* derive from type.
RefObject* refObj = exprType;
// 'as' just looks at the from type, and it doesn't derive from Type (it's just RefObject), so it does regular as, which is dynamicCast
Type* type2 = as<Type>(refObject);
SLANG_ASSERT(type2 == exprType);
// Finally...
// Is true even though exprType is a NamedExpression, because the cast is on the canonical type
SLANG_ASSERT(as<NamedExpression>(exprType) == nullptr);
// dynamicCast is always the same object returned, so must match
SLANG_ASSERT(dynamicCast<NamedExpression>(exprType) == exprType);
```

View file

@ -1,282 +0,0 @@
Slang Project Coding Conventions
================================
Principles
----------
This document attempts to establish conventions to be used in the Slang codebase.
We have two goals for this convention.
The first goal is to make the code look relatively consistent so that it is easy to navigate and understand for contributors.
Having varying styles across different modules, files, functions, or lines of code makes the overall design and intention of the codebase harder to follow.
The second goal is to minimize the scope complexity of diffs when multiple maintainers work together on the codebase.
In the absence of an enforced style, developers tend to "clean up" code they encounter to match their personal preferences, and in so doing create additional diffs that increase the chances of merge conflicts and pain down the line.
Because the Slang codebase has passed through many hands and evolved without a pre-existing convention, these two goals can come into conflict.
We encourage developers to err on the side of leaving well enough alone (favoring the second goal).
Don't rewrite or refactor code to match these conventions unless you were already going to have to touch all of those lines of code anyway.
Note that external code that is incorporated into the project is excluded from all of these conventions.
Languages
---------
### C++
Most code in the Slang project is implemented in C++.
We currently assume support for some C++11 idioms, but have explicitly avoided adding dependencies on later versions.
As a general rule, be skeptical of "modern C++" ideas unless they are clearly better to simpler alternatives.
We are not quite in the realm of "Orthodox C++", but some of the same guidelines apply:
* Don't use exceptions for non-fatal errors (and even then support a build flag to opt out of exceptions)
* Don't use the built-in C++ RTTI system (home-grown is okay)
* Don't use the C++ variants of C headers (e.g., `<cstdio>` instead of `<stdio.h>`)
* Don't use the STL containers
* Don't use iostreams
The compiler implementation does not follow some of these guidelines at present; that should not be taken as an excuse to further the proliferation of stuff like `dynamic_cast`.
Do as we say, not as we do.
Some relatively recent C++ features that are okay to use:
* Rvalue references for "move semantics," but only if you are implementing performance-critical containers or other code where this really matters.
* `auto` on local variables, if the expected type is clear in context
* Lambdas are allowed, but think carefully about whether just declaring a subroutine would also work.
* Using `>>` to close multiple levels of templates, instead of `> >` (but did you really need all those templates?)
* `nullptr`
* `enum class`
* Range-based `for` loops
* `override`
* Default member initializers in `class`/`struct` bodies
Templates are suitable in cases where they improve clarity and type safety.
As a general rule, it is best when templated code is kept minimal, and forwards to a non-templated function that does the real work, to avoid code bloat.
Any use of template metaprogramming would need to prove itself exceptionally useful to pay for the increase in cognitive complexity.
We don't want to be in the business of maintaining "clever" code.
As a general rule, `const` should be used sparingly and only with things that are logically "value types."
If you find yourself having to `const`-qualify a lot of member function in type that you expect to be used as a heap-allocated object, then something has probably gone wrong.
As a general rule, default to making the implementation of a type `public`, and only encapsulate state or operations with `private` when you find that there are complex semantics or invariants that can't be provided without a heavier hand.
### Slang
The Slang project codebase also includes `.slang` files implementing the Slang core module, as well as various test cases and examples.
The conventions described here are thus the "official" recommendations for how users should format Slang code.
To the extent possible, we will try to apply the same basic conventions to both C++ and Slang.
In places where we decide that the two languages merit different rules, we will point it out.
Files and Includes
------------------
### File Names
All files and directories that are added to codebase should have names that contain only ASCII lower-case letters, digits, dots (`.`) and dashes (`-`).
Operating systems still vary greatly in their handling of case sensitivity for file names, and non-ASCII code points are handled with even less consistency; sticking to a restricted subset of ASCII helps avoids some messy interactions between case-insensitive file systems and case-sensitive source-control systems like Git.
As with all these conventions, files from external projects are exempted from these restrictions.
### Naming of Source and Header Files
In general the C++ codebase should be organized around logical features/modules/subsystem, each of which has a single `.h` file and zero or more `.cpp` files to implement it.
If there is a single `.cpp` file, its name should match the header: e.g., `parser.h` and `parser.cpp`.
If there is more than one `.cpp` file, their names should start with the header name: e.g., `parser.h` and `parser-decls.cpp` and `parser-exprs.cpp`.
If there are declarations that need to be shared by the `.cpp` files, but shouldn't appear in the public interface, then can go in a `*-impl.h` header (e.g., `parser-impl.h`).
Use best judgement when deciding what counts as a "feature." One class per file is almost always overkill, but the codebase currently leans too far in the other direction, with some oversized source files.
### Headers
Every header file should have an include guard.
Within the implementation we can use `#pragma once`, but exported API headers (`slang.h`) should use traditional `#ifdef` style guards (and they should be consumable as both C and C++).
A header should include or forward-declare everything it needs in order to compile.
It is *not* up to the programmer who `#include`s a header to sort out the dependencies.
Avoid umbrella or "catch-all" headers.
### Source Files
Every source file should start by including the header for its feature/module, before any other includes (this helps ensure that the header correctly includes its dependencies).
Functions that are only needed within that one source file can be marked `static`, but we should avoid using the same name for functions in different files (in order to support lumped/unified builds).
### Includes
In general, includes should be grouped as follows:
* First, the correspodning feature/module header, if we are in a source file
* Next, any `<>`-enlosed includes for system/OS headers
* Next, any `""`-enclosed includes for external/third-part code that is stored in the project repository
* Finally, any includes for other features in the project
Within each group, includes should be sorted alphabetically.
If this breaks because of ordering issues for system/OS/third-party headers (e.g., `<windows.h>` must be included before `<GL/GL.h>`), then ideally those includes should be mediated by a Slang-project-internal header that features can include.
Namespaces
----------
Favor fewer namespaces when possible.
Small programs may not need any.
All standard module code that a Slang user might link against should go in the `Slang` namespace for now, to avoid any possibility of clashes in a static linking scenario.
The public C API is obviously an exception to this.
Code Formatting
------------------------------
- For C++ files, please format using `clang-format`; `.clang-format` files in
the source tree define the style.
- For CMake files, please format using `gersemi`
- For shell scripts, please format using `shfmt`
- For YAML files, please use `prettier`
The formatting for the codebase is overall specified by the
[`extras/formatting.sh`](./extras/formatting.sh) script.
If you open a pull request and the formatting is incorrect, you can comment
`/format` and a bot will format your code for you.
Naming
------
### Casing
Types should in general use `UpperCamelCase`. This includes `struct`s, `class`es, `enum`s and `typedef`s.
Values should in general use `lowerCamelCase`. This includes functions, methods, local variables, global variables, parameters, fields, etc.
Macros should in general use `SCREAMING_SNAKE_CASE`.
It is important to prefix all macros (e.g., with `SLANG_`) to avoid collisions, since `namespace`s don't affect macros).
In names using camel case, acronyms and initialisms should appear eniterly in either upper or lower case (e.g., `D3DThing d3dThing`) and not be capitalized as if they were ordinary words (e.g., `D3dThing d3dThing`).
Note that this also applies to uses of "ID" as an abbreviation for "identifier" (e.g., use `nodeID` instead of `nodeId`).
### Prefixes
Prefixes based on types (e.g., `p` for pointers) should never be used.
Global variables should have a `g` prefix, e.g. `gCounter`.
Non-`const` `static` class members can have an `s` prefix if that suits your fancy.
Of course, both of these should be avoided, so this shouldn't come up often.
Constant data (in the sense of `static const`) should have a `k` prefix.
In contexts where "information hiding" is relevant/important, such as when a type has both `public` and `private` members, or just has certain operations/fields that are considered "implementation details" that most clients should not be using, an `m_` prefix on member variables and a `_` prefix on member functions is allowed (but not required).
In function parameter lists, an `in`, `out`, or `io` prefix can be added to a parameter name to indicate whether a pointer/reference/buffer is intended to be used for input, output, or both input and output.
For example:
```c++
void copyData(void* outBuffer, void const* inBuffer, size_t size);
Result lookupThing(Key k, Thing& outThing);
void maybeAppendExtraNames(std::vector<Name>& ioNames);
```
Public C APIs will prefix all symbol names while following the casing convention (e.g. `SlangModule`, `slangLoadModule`, etc.).
### Enums
C-style `enum` should use the following convention:
```c++
enum Color
{
kColor_Red,
kColor_Green,
kColor_Blue,
kColorCount,
};
```
When using `enum class`, drop the `k` and type name as prefix, but retain the `UpperCamelCase` tag names:
```c++
enum class Color
{
Red,
Green,
Blue,
Count,
};
```
When defining a set of flags, separate the type definition from the `enum`:
```c++
typedef unsigned int Axes;
enum
{
kAxes_None = 0,
kAxis_X = 1 << 0,
kAxis_Y = 1 << 1,
kAxis_Z = 1 << 2,
kAxes_All = kAxis_X | kAxis_Y | kAxis_Z,
};
```
Note that the type name reflects the plural case, while the cases that represent individual bits are named with a singular prefix.
In public APIs, all `enum`s should use the style of separating the type definition from the `enum`, and all cases should use `SCREAMING_SNAKE_CASE`:
```c++
typedef unsigned int SlangAxes;
enum
{
SLANG_AXES_NONE = 0,
SLANG_AXIS_X = 1 << 0,
SLANG_AXIS_Y = 1 << 1,
SLANG_AXIS_Z = 1 << 2,
SLANG_AXES_ALL = SLANG_AXIS_X | SLANG_AXIS_Y | SLANG_AXIS_Z,
};
```
### General
Names should default to the English language and US spellings, to match the dominant conventions of contemporary open-source projects.
Function names should either be named with action verbs (`get`, `set`, `create`, `emit`, `parse`, etc.) or read as questions (`isEnabled`, `shouldEmit`, etc.).
Whenever possible, compiler concepts should be named using the most widely-understood term available: e.g., we use `Token` over `Lexeme`, and `Lexer` over `Scanner` simply because they appear to be the more common names.
Avoid abbreviations and initialisms unless they are already widely established across the codebase; a longer name may be cumbersome to write in the moment, but the code will probably be read many more times than it is written, so clarity should be preferred.
An important exception to this is common compiler concepts or techniques which may have laboriously long names: e.g., Static Single Assignment (SSA), Sparse Conditional Copy Propagation (SCCP), etc.
One gotcha particular to compiler front-ends is that almost every synonym for "type" has some kind of established technical meaning; most notably the term "kind" has a precise meaning that is relevant in our domain.
It is common practice in C and C++ to define tagged union types with a selector field called a "type" or "kind," which does not usually match this technical definition.
If a developer wants to avoid confusion, they are encouraged to use the term "flavor" instead of "type" or "kind" since this term (while a bit silly) is less commonly used in the literature.
Comments and Documentation
--------------------------
You probably know the drill: comments are good, but an out-of-date comment can be worse than no comment at all.
Try to write comments that explain the "why" of your code more than the "what."
When implementing a textbook algorithm or technique, it may help to imagine giving the reviewer of your code a brief tutorial on the topic.
In cases where comments would benefit from formatting, use Markdown syntax.
We do not currently have a setup for extracting documentation from comments, but if we add one we will ensure that it works with Markdown.
When writing comments, please be aware that your words could be read by many people, from a variety of cultures and backgrounds.
Default to a plain-spoken and professional tone and avoid using slang, idiom, profanity, etc.

View file

@ -1,166 +0,0 @@
Understanding Declaration References (Out of Date)
====================================
This document is intended as a reference for developers working on the Slang compiler implementation.
As you work on the code, you'll probably notice a lot of places where we use the `DeclRef<T>` type:
* Expressions like `VarExpr` and `MemberExpr` are subclasses of `DeclRefExpr`, which holds a `DeclRef<Decl>`.
* The most common subclass of `Type` is `DeclRefType`, which holds a `DeclRef<Decl>` for the type declaration.
* Named types (references to `typedef`s) hold a `DeclRef<TypedefDecl>`
* The name lookup process relies a lot on `DeclRef<ContainerDecl>`
So what in the world is a `DeclRef`?
The short answer is that a `DeclRef` packages up two things:
1. A pointer to a `Decl` in the parsed program AST
2. A set of "substitutions" to be applied to that decl
Why do we need `DeclRef`s?
--------------------------
In a compiler for a simple language, we might represent a reference to a declaration as simply a pointer to the AST node for the declaration, or some kind of handle/ID that references that AST node.
A representation like that will work in simple cases, for example:
```hlsl
struct Cell { int value };
Cell a = { 3 };
int b = a.value + 4;
```
In this case, the expression node for `a.value` can directly reference the declaration of the field `Cell::value`, and from that we can conclude that the type of the field (and hence the expression) is `int`.
In contrast, things get more complicated as soon as we have a language with generics:
```hlsl
struct Cell<T> { T value; };
// ...
Cell<int> a = { 3 };
int b = a.value + 4;
```
In this case, if we try to have the expression `a.value` only reference `Cell::value`, then the best we can do is conclude that the field has type `T`.
In order to correctly type the `a.value` expression, we need enough additional context to know that it references `Cell<int>::value`, and from that to be able to conclude that a reference to `T` in that context is equivalent to `int`.
We can represent that information as a substitution which maps `T` to `int`:
```
[ Cell::T => int ]
```
Then we can encode a reference to `Cell<int>::value` as a reference to the single declaration `Cell::value` with such a substitution applied:
```
Cell::value [Cell::T => int]
```
If we then want to query the type of this field, we can first look up the type stored on the AST (which will be a reference to `Cell::T`) and apply the substitutions from our field reference to get:
```
Cell::T [Cell::T => int]
```
Of course, we can then simplify the reference by applying the substitutions, to get:
```
int
```
How is this implemented?
------------------------
At the highest level, a `DeclRef` consists of a pointer to a declaration (a `Decl*`) plus a single-linked list of `Substution`s.
These substitutions fill in the missing information for any declarations on the ancestor chain for the declaration.
Each ancestor of a declaration can introduce an expected substitution along the chain:
* Most declarations don't introduce any substitutions: e.g., when referencing a non-generic `struct` we don't need any addition information.
* A surrounding generic declaration requires a `GenericSubstitution` which specifies the type argument to be plugged in for each type parameter of the declaration.
* A surrounding `interface` declaration usually requires a `ThisTypeSubstitution` that identifies the specific type on which an interface member has been looked up.
All of the expected substitutions should be in place in the general case, even when we might not have additional information. E.g., within a generic declaration like this:
```hlsl
struct Cell<T>
{
void a();
void b() { a(); }
}
```
The reference to `a` in the body of `b` will be represented as a declaration reference to `Cell::a` with a substitution that maps `[Cell::T => Cell::T]`. This might seem superfluous, but it makes it clear that we are "applying" the generic to arguments (even if they are in some sense placeholder arguments), and not trying to refer to an unspecialized generic.
There are a few places in the compiler where we might currently bend these rules, but experience has shown that failing to include appropriate substitutions is more often than not a source of bugs.
What in the world is a "this type" substitution?
------------------------------------------------
When using interface-constrained generics, we need a way to invoke methods of the interface on instances of a generic parameter type.
For example, consider this code:
```hlsl
interface IVehicle
{
associatedtype Driver;
Driver getDriver();
}
void ticketDriver<V : IVehicle>(V vehicle)
{
V.Driver driver = vehicle.getDriver();
sentTicketTo(driver);
}
```
In the expression `vehicle.getDriver`, we are referencing the declaration of `IVehicle::getDriver`, and so a naive reading tells us that the return type of the call is `IVehicle.Driver`, but that is an associated type and not a concrete type. It is clear in context that the expression `vehicle.getDriver()` should result in a `V.Driver`.
The way the compiler encodes that is that we treat the expression `v.getDriver` as first "up-casting" the value `v` (of type `V`) to the interface `IVehicle`. We know this is valid because of the generic constraint `V : IVehicle`. The result of the up-cast operation is an expression with a type that references `IVehicle`, but with a substitution to track the fact that the underlying implementation type is `V`. This amounts to something like:
```
IVehicle [IVehicle.This => V]
```
where `IVehicle.This` is a way to refer to "the concrete type that is implementing `IVehicle`".
Looking up the `getDriver` method on this up-cast expression yields a reference to:
```
IVehicle::getDriver [IVehicle.This => V]
```
And extracting the return type of that method gives us a reference to the type:
```
IVehicle::Driver [IVehicle.This => V]
```
which turns out to be exactly what the front end produces when it evaluates the type reference `V.Driver`.
As this example shows, a "this type" substitution allows us to refer to interface members while retaining knowledge of the specific type on which those members were looked up, so that we can compute correct references to things like associated types.
What does any of this mean for me?
----------------------------------
When working in the Slang compiler code, try to be aware of whether you should be working with a plain `Decl*` or a full `DeclRef`.
There are many queries like "what is the return type of this function?" that typically only make sense if you are applying them to a `DeclRef`.
The `syntax.h` file defines helpers for most of the existing declaration AST nodes for querying properties that should represent substitutions (the type of a variable, the return type of a function, etc.).
If you are writing code that is working with a `DeclRef`, try to use these accessors and avoid being tempted to extract the bare declaration and start querying it.
Some things like `Modifier`s aren't (currently) affected by substitutions, so it can make sense to query them on a bare declaration instead of a `DeclRef`.
Conclusion
----------
Working with `DeclRef`s can be a bit obtuse at first, but they are the most elegant solution we've found to the problems that arise when dealing with generics and interfaces in the compiler front-end. Hopefully this document gives you enough context to see why they are important, and hints at how their representation in the compiler helps us implement some cases that would be tricky otherwise.

View file

@ -1,252 +0,0 @@
Existential Types
=================
This document attempts to provide some background on "existential types" as they pertain to the design and implementation of Slang.
The features described here are *not* reflected in the current implementation, so this is mostly a sketch of where we can go with the language and compiler.
Background: Generics and Universal Quantification
-------------------------------------------------
Currently Slang supports using interfaces as generic constraints. Let's use a contrived example:
```hlsl
interface IImage { float4 getValue(float2 uv); }
float4 offsetImage<T : IImage>(T image, float2 uv)
{
float2 offset = ...;
return image.getValue(uv + offset)
}
```
Generics like this are a form of "universal quantification" in the terminology of type theory.
This makes sense, because *for all* types `T` that satisfy the constraints, `offsetImage` provides an implementation of its functionality.
When we think of translating `offsetImage` to code, we might at first only think about how we can specialize it once we have a particular type `T` in mind.
However, we can also imagine trying to generate one body of code that can implement `offsetImage` for *any* type `T`, given some kind of runtime representation of types.
For example, we might generate C++ code like:
```c++
struct IImageWitnessTable { float4 (*getValue)(void* obj, float2 uv); };
float4 offsetImage(Type* T, IImageWitnessTable* W, void* image, float2 uv)
{
float2 offset = ...;
return W->getvalue(image, uv + offset);
}
```
This translation takes the generic parameters and turns them into ordinary runtime parameters: the type `T` becomes a pointer to a run-time type representation, while the constraint that `T : IImage` becomes a "witness table" of function pointers that, we assume, implements the `IImage` interface for `T`. So, the syntax of generics is *not* tied to static specialization, and can admit a purely runtime implementation as well.
Readers who are familiar with how languages like C++ are implemented might see the "witness table" above and realize that it is kind of like a virtual function table, just being passed alongside the object, rather than stored in its first word.
Using Interfaces Like Types
---------------------------
It is natural for a user to want to write code like the following:
```hlsl
float4 modulateImage(IImage image, float2 uv)
{
float4 factor = ...;
return factor * image.getValue(uv);
}
```
Unlike `offsetImage`, `modulateImage` is trying to use the `IImage` interface as a *type* and not just a constraint.
This code appears to be asking for a dynamic implementation rather than specialization (we'll get back to that...) and so we should be able to implement it similarly to our translation of `offsetImage` to C++.
Something like the following makes a lot of sense:
```c++
struct IImage { Type* T; IImageWitnessTable* W; void* obj; };
float4 modulateImage(IImage image, float2 uv)
{
float4 factor = ...;
return factor * image.W->getvalue(image.obj, uv);
}
```
Similar to the earlier example, there is a one-to-one mapping of the parameters of the Slang function the user wrote to the parameters of the generated C++ function.
To make this work, we had to bundle up the information that used to be separate parameters to the generic as a single value of type `IImage`.
Existential Types
-----------------
It turns out that when we use `IImage` as a type, it is what we'd call an *existential* type.
That is because if I give you a value `img` of type `IImage` in our C++ model, then you know that *there exists* some type `img.T`, a witness table `img.W` proving the type implements `IImage`, and a value `img.obj` of that type.
Existential types are the bread and butter of object-oriented programming.
If I give you an `ID3D11Texture2D*` you don't know what its concrete type is, and you just trust me that some concrete type *exists* and that it implements the interface.
A C++ class or COM component can implement an existential type, with the constraint that the interfaces that a given type can support is limited by the way that virtual function tables are intrusively included inside the memory of the object, rather than externalized.
Many modern languages (e.g., Go) support adapting existing types to new interfaces, so that a "pointer" of interface type is actually a fat pointer: one for the object, and one for the interface dispatch table.
Our examples so far have assumed that the type `T` needs to be passed around separately from the witness table `W`, but that isn't strictly required in some implementations.
In type theory, the most important operation you can do with an existential type is to "open" it, which means to have a limited scope in which you can refer to the constituent pieces of a "bundled up" value of a type like `IImage`.
We could imagine "opening" an existential as something like:
```
void doSomethingCool<T : IImage>(T val);
void myFunc(IImage img)
{
open img as obj:T in
{
// In this scope we know that `T` is a type conforming to `IImage`,
// and `obj` is a value of type `T`.
//
doSomethingCool<T>(obj);
}
}
```
Self-Conformance
----------------
The above code with `doSomethingCool` and `myFunc` invites a much simpler solution:
```
void doSomethingCool<T : IImage>(T val);
void myFunc(IImage img)
{
doSomethingCool(img);
}
```
This seems like an appealing thing for a language to support, but there are some subtle reasons why this isn't possible to support in general.
If we think about what `doSomethingCool(img)` is asking for, it seems to be trying to invoke the function `doSomethingCool<IImage>`.
That function only accepts type parameters that implement the `IImage` interface, so we have to ask ourselves:
Does the (existential) type `IImage` implement the `IImage` interface?
Knowing the implementation strategy outline above, we can re-phrase this question to: can we construct a witness table that implements the `IImage` interface for values of type `IImage`?
For simple interfaces this is sometimes possible, but in the general case there are other desirable language features that get in the way:
* When an interface has associated types, there is no type that can be chosen as the associated type for the interface's existential type. The "obvious" approach of using the constraints on the associated type can lead to unsound logic when interface methods take associated types as parameters.
* When an interface uses the "this type" (e.g., an `IComparable` interface with a `compareTo(ThisType other)` method), it isn't correct to simplify the this type to the interface type (just because you have two `IComarable` values doesn't mean you can compare them - they have to be of the same concrete type!)
* If we allow for `static` method on interfaces, then what implementation would we use for these methods on the interface's existential type?
Encoding Existentials in the IR
-------------------------------
Existentials are encoded in the Slang IR quite simply. We have an operation `makeExistential(T, obj, W)` that takes a type `T`, a value `obj` that must have type `T`, and a witness table `W` that shows how `T` conforms to some interface `I`. The result of the `makeExistential` operation is then a value of the type `I`.
Rather than include an IR operation to "open" an existential, we can instead just provide accessors for the pieces of information in an existential: one to extract the type field, one to extract the value, and one to extract the witness table. These would idiomatically be used like:
```
let e : ISomeInterface = /* some existential */
let T : Type = extractExistentialType(e);
let W : WitnessTbale = extractExistentialWitnessTable(e);
let obj : T = extractExistentialValue(e);
```
Note how the operation to extract `obj` gets its result type from the previously-executed extraction of the type.
Simplifying Code Using Existentials
-----------------------------------
It might seem like IR code generated using existentials can only be implemented using dynamic dispatch.
However, within a local scope it is clear that we can simplify expressions whenever `makeExistential` and `extractExistential*` operations are paired.
For example:
```
let e : ISomeInterface = makeExistential(A, a, X);
...
let B = extractExistentialType(e);
let b : B = extractExistentialValue(e);
let Y = extractExistentialWitnessTable(e);
```
It should be clear in context that we can replace `B` with `A`, `b` with `a`, and `Y` with `X`, after which all of the `extract*` operations and the `makeExistential` operation are dead and can be eliminated.
This kind of simplification works within a single function, as long as there is no conditional logic involving existentials.
We require further transformation passes to allow specialization in more general cases:
* Copy propagation, redundancy elimination and other dataflow optimizations are needed to simplify use of existentials within functions
* Type legalization passes, including some amount of scalarization, are needed to "expose" existential-type fields that are otherwise buried in a type
* Function specialization, is needed so that a function with existential parameters is specialized based on the actual types used at call sites
Transformations just like these are already required when working with resource types (textures/samplers) on targets that don't support first-class computation on resources, so it is possible to share some of the same logic.
Similarly, any effort we put into validation (to ensure that code is written in a way that *can* be simplified) can hopefully be shared between existentials and resources.
Compositions
------------
So far I've only talked about existential types based on a single interface, but if you look at the encoding as a tuple `(obj, T, W)` there is no real reason that can't be generalized to hold multiple witness tables: `(obj, T, W0, ... WN)`. Interface compositions could be expressed at the language level using the `&` operator on interface (or existential) types.
The IR encoding doesn't need to change much to support compositions: we just need to allow multiple witness tables on `makeExistential` and have an index operand on `extractExistentialWitnessTable` to get at the right one.
The hardest part of supporting composition of interfaces is actually in how to linearize the set of interfaces in a way that is stable, so that changing a function from using `IA & IB` to `IB & IA` doesn't change the order in which witness tables get packed into an existential value.
Why are we passing along the type?
----------------------------------
I'm glossing over something pretty significant here, which is why anybody would pass around the type as part of the existential value, when none of our examples so far have made use of it.
This sort of thing isn't very important for languages where interface polymorphism is limited to heap-allocated "reference" types (or values that have been "boxed" into reference types), because the dynamic type of an object can almost always be read out of the object itself.
When dealing with a value type, though, we have to deal with things like making *copies*:
```
interface IWritable { [mutating] void write(int val); }
struct Cell : IWritable { int data; void write(int val) { data = val; } }
T copyAndClobber<T : IWritable>(T obj)
{
T copy = obj;
obj.write(9999);
return copy;
}
void test()
{
Cell cell = { 0 };
Cell result = copyAndClobber(cell);
// what is in `result.data`?
}
```
If we call `copyAndClober` on a `Cell` value, then does the line `obj.write` overwrite the data in the explicit `copy` that was made?
It seems clear that a user would expect `copy` to be unaffected in the case where `T` is a value type.
How does that get implemented in our runtime version of things? Let's imagine some C++ translation:
```
void copyAndClobber(Type* T, IWriteableWitnessTable* W, void* obj, void* _returnVal)
{
void* copy = alloca(T->sizeInBytes);
T->copyConstruct(copy, obj);
W->write(obj, 9999);
T->moveConstruct(_returnVal, copy);
}
```
Because this function returns a value of type `T` and we don't know how big that is, let's assume the caller is passing in a pointer to the storage where we should write the result.
Now, in order to have a local `copy` of the `obj` value that was passed in, we need to allocate some scratch storage, and only the type `T` can know how many bytes we need.
Furthermore, when copying `obj` into that storage, or subsequently copying the `copy` variable into the function result, we need the copy/move semantics of type `T` to be provided by somebody.
This is the reason for passing through the type `T` as part of an existential value.
If we only wanted to deal with reference types, this would all be greatly simplified, because the `sizeInBytes` and the copy/move semantics would be fixed: everything is a single pointer.
All of the same issues arise if we're making copies of existential values:
```
IWritable copyAndClobberExistential(IWritable obj)
{
IWritable copy = obj;
obj.write(9999);
return copy;
}
```
If we want to stay consistent and say that `copy` is an actual copy of `obj` when the underlying type is a value rather than a reference type, then we need the copy/move operations for `IWritable` to handle invoking the copy/move operations of the underlying encapsulated type.
Aside: it should be clear from these examples that implementing generics and existential types with dynamic dispatch has a lot of complexity when we have to deal with value types (because copying requires memory allocation).
It is likely that a first implementation of dynamic dispatch support for Slang would restrict it to reference types (and would thus add a `class` keyword for defining reference types).

View file

@ -1,74 +0,0 @@
Deploying Experimental API Additions
====================================
This page intends to provide guidance to Slang developers when extending the Slang API, particularly when working on experimental features.
It applies to the "COM-lite" Slang API, rather than the deprecated C Slang API (sp* functions).
* Note: This guidance relates to Slang API changes, not to language changes. That is, what Slang does with shader source code across releases is not discussed here.
The goal is to maintain binary compatibility as much as possible between Slang releases, and to aid applications in dealing with changes to Slang.
Slang is distributed as a dynamic library, and there is an expectation from Slang API users that upgrading by installing an updated slang.dll or slang.so will not break their application unnecessarily.
ABI compatibility within the Slang API can be preserved between releases if some rules are followed by developers.
Slang API uses a "COM-lite" structure wherein functionality is exposed through interfaces on objects. If the interfaces never change, ABI compatibility is preserved, but changes happen. When adding or changing interfaces, please observe the following:
1. It is preferred to create *new* COM interfaces when adding new functionality.
* This maintains ABI compatibility.
* Applications must acquire access to the new functionality using QueryInterface(), which will gracefully fail if the slang.dll/slang.so does not implement the functionality.
2. Changes to existing virtual methods in COM interfaces should be avoided, as that is an ABI breakage.
* If a change is required though, change the interface's UUID.
3. New virtual methods _may_ be added (only) to the end of existing COM interface structs.
* This does not disturb the ABI compatibility of the associated vtable. Old apps can remain unaware of the new function pointers appended to the end of the vtable.
* A UUID change is not necessary.
* Note that in the event that a Slang application which uses the added feature is run with an old slang.dll/slang.so, the experience for the user is not as clean as if the added method belongs to a new interface.
Adding Experimental Interfaces
==============================
When the above recommendations cannot be followed, as with features that are expected to be iterated on or are regarded as temporary, there are additional recommendations.
Interfaces that are expected to change must be marked `_Experimental` in their class name and in their UUID name.
For example,
```csharp
/* Experimental interface for doing something cool. This interface is susceptible to ABI breakage. */
struct ICoolNewFeature_Experimental : public ISlangUnknown
{
SLANG_COM_INTERFACE(0x8e12e8e3, 0x5fcd, 0x433e, { 0xaf, 0xcb, 0x13, 0xa0, 0x88, 0xbc, 0x5e, 0xe5 })
virtual SLANG_NO_THROW SlangResult SLANG_MCALL coolMethod() = 0;
};
#define SLANG_UUID_ICoolNewFeature_Experimental ICoolNewFeature_Experimental::getTypeGuid()
```
Note: Use uuidgen to generate IIDs new interfaces.
Removing Experimental Interfaces
================================
By the nature of being marked "Experimental", users have been warned that the interfaces are not officially supported and may be removed. You may simply delete the class and UUID, e.g. "ICoolNewFeature_Experimental" struct may be deleted from slang.h along with the definition of SLANG_UUID_ICoolNewFeature_Experimental.
This will show up in applications as QueryInterface failures.
It is nice, but not required, to retain the interface declarations for some time after removing internal support before deleting them from slang.h, so that applications have time to remove their dependence on the unsupported feature while still being able to compile in the interim.
Changing Experimental Interfaces
================================
Backwards incompatible changes to Slang COM interfaces should be accompanied with a UUID change.
In the event that an old application runs with a new slang library, applications are more capable of gracefully handling an unavailable interface than a changed one. The former may be still be functional, or include a helpful error message, whereas the latter is most likely a crash of some sort.
Promoting Experimental Interfaces
=================================
The class name and the UUID name should be changed in slang.h and in the slang source code, e.g. Rename "ICoolNewFeature_Experimental" to just "ICoolFeature".
The SLANG_UUID for the interface should be renamed to omit "EXPERIMENTAL" but its value should remain the same. This is because, if there are no backwards incompatible changes that accompany the promotion from experimental to permanent, applications written against the experimental version can continue working against Slang libraries where the interface was promoted to permanent.

View file

@ -1,486 +0,0 @@
Interfaces Design
=================
This document intends to lay out the proposed design for a few inter-related features in Slang:
- Interfaces
- Associated Types
- Generics
Introduction
------------
The basic problem here is not unique to shader programming: you want to write code that accomplished one task, while abstracting over how to accomplish another task.
As an example, we might want to write code to integrate incident radiance over a list of lights, while not concerning ourself with how to evaluate a reflectance function at each of those lights.
If we were doing this task on a CPU, and performance wasn't critical, we could probably handle this with higher-order functions or an equivalent mechanism like function pointers:
float4 integrateLighting(
Light[] lights,
float4 (*brdf)(float3 wi, float3 wi, void* userData),
void const* brdfUserData)
{
float4 result = 0;
for(/* ... */) {
// ...
result += brdf(wi, wo, brdfUserDat);
}
return result;
}
Depending on the scenario, we might be able to generate statically specialized code by using templates instead:
template<typename BRDF>
float4 integrateLighting(Light[] lights, BRDF const& brdf)
{
// ...
result += brdf(wi, wo);
// ...
}
Current shading languages support neither higher-order functions nor templates/generics, so neither of these options is viable.
Instead practitioners typically use preprocessor techniques to either stich together the final code, or to substitute in different function/type definitions to make a definition like `integrateLighting` reusable.
These ad hoc approaches actually work well in practice; we aren't proposing to replace them *just* to make code abstractly "cleaner."
Rather, we've found that the ad hoc approaches end up interacting poorly with the resource binding model in modern APIs, so that *something* less ad hoc is required to achieve our performance goals.
At that point, we might as well ensure that the mechanism we introduce is also a good fit for the problem.
Overview
--------
The basic idea for our approach is as follows:
- Start with the general *semantics* of a generic-based ("template") approach
- Use the accumulated experience of the programming language community to ensure that our generics are humane (in other words: not like C++)
- Expore the possibility of syntax sugar to let people use more traditional OOP-style syntax when it can reduce verbosity without harming understanding
In general, our conceptual model is being ripped off wholesale from Rust and Swift.
The basic design principle is "when in doubt, do what Swift does."
Interfaces
----------
An **interface** in Slang is akin to a `protocol` in Swift or a `trait` in Rust.
The choice of the `interface` keyword is to highlight the overlap with the conceptually similar construct that appeared in Cg, and then later in HLSL.
### Declaring an interface
An interface is a named collection of **requirements**; any type that **implements** the interface must provide definitions that satisfy those requirements.
Here is a simple interface, with one requirement:
interface Light
{
float3 illuminate(float3 P_world);
}
The `Light` interface requires a (member) function called `illuminate` with the given signature.
### Declaring that a type implementats an interface
A user-defined `struct` type can declare that it implements an interface, by using conventional "inheritance" syntax:
struct PointLight : Light
{
float3 P_light;
float3 illuminate(float3 P_world)
{
float distance = length(P_light - P_world);
// ...
}
}
It is a static error if a type declares that it implements an interface, but it does not provide all of the requirements:
struct BadLight : Light
{
// ERROR: type 'BadLight' cannot implement 'Light'
// because it does not provide the required 'illuminate' function
}
### Interface Inheritance
While this document does not propose general notions of inheritance be added to Slang, it does make sense to allow an interface to inherit from zero or more other interfaces:
interface InfinitessimalLight : Light
{
float3 getDirection(float3 P_world);
}
In this case the `InfinitessimalLight` interface inherits from `Light`, and declares one new requirement.
In order to check that a type implements `InfinitessimalLight`, the compiler will need to check both that it implements `Light` and that it provides the new "direct" requirements in `InfinitessimalLight`.
Declaring that a type implements an interface also implicitly declares that it implements all the interfaces that interface transitively inherits from:
struct DirectionalLight : InfinitessimalLight
{
float3 L;
float3 dir;
float3 getDirection(float3 P_world) { return dir; }
float3 illuminate(float3 P_world)
{
// Okay, this is the point where I recognize
// that this function definition is not
// actually reasonable for a light...
}
### Interfaces and Extensions
It probably needs its own design document, but Slang currently has very basic support for `extension` declarations that can add members to an existing type.
These blocks correspond to `extension` blocks in Swift, or `impl` blocks in Rust.
This can be used to declare that a type implements an interface retroactively:
extension PointLight : InfinitessimalLight
{
float3 getDirection(float3 P_world)
{
return normalize(P_light - P_world);
}
}
In this case we've used an extension to declare the `PointLight` also implements `InfinitessimalLight`. For the extension to type-check we need to provide the new required function (the compiler must recognize that the implementation of `Light` was already provided by the original type definition).
There are some subtleties around using extensions to add interface implementations:
- If the type already provides a method that matches a requireemnt, can the extension "see" it to satisfying new requirements?
- When can one extension "see" members (or interface implementations) added by another?
A first implementation can probably ignore the issue of interface implementations added by extensions, and only support them directly on type definitions.
Generics
--------
All of the above discussion around interfaces neglected to show how to actually *use* the fact that, e.g., `PointLight` implements the `Light` interface.
That is intentional, because at the most basic level, interfaces are designed to be used in the context of **generics**.
### Generic Declarations
The Slang compiler currently has some ad hoc support for generic declarations that it uses to implement the HLSL standard module (which has a few generic types).
The syntax for those is currently very bad, and it makes sense to converge on the style for generic declarations used by C# and Swift:
float myGenericFunc<T>(T someValue);
Types can also be generic:
struct MyStruct<T> { float a; T b; }
Ideally we should also allow interfaces and interface requirements to be generic, but there will probably be some limits due to implementation complexity.
### Type Constraints
Unlike C++, Slang needs to be able to type-check the body of a generic function ahead of time, so it can't rely on `T` having particular members:
// This generic is okay, because it doesn't assume anything about `T`
// (other than the fact that it can be passed as input/output)
T okayGeneric<T>(T a) { return a; }
// This generic is not okay, because it assumes that `T` supports
// certain operators, and we have no way of knowing it this is true:
T notOkayGeneric<T>(T a) { return a + a; }
In order to rely on non-trivial operations in a generic parameter type like `T`, the user must **constrain** the type parameter using an interface:
float3 mySurfaceShader<L : Light>(L aLight)
{
return aLight.illuminate(...);
}
In this example, we have constrained the type parameter `L` so that it must implement the interface `Light`.
As a result, in the body of the function, the compiler can recognize that `aLight`, which is of type `L`, must implement `Light` and thus have a member `illuminate`.
When calling a function with a constrained type parameter, the compiler must check that the actual type argument (whether provided explicitly or inferred) implements the interface given in the constraint:
mySurfaceShader<PointLight>(myPointLight); // OK
mySurfaceShader(myPointLight); // equivalent to previous
mySurfaceShader(3.0f); // ERROR: `float` does not implement `Light`
Note that in the erroneous case, the error is reported at the call site, rather than in the body of the callee (as it would be for C++ templates).
For cases where we must constrain a type parameter to implement multiple interfaces, we can join the interface types with `&`:
interface Foo { void foo(); }
interface Bar { void bar(); }
void myFunc<T : Foo & Bar>(T val)
{
val.foo();
val.bar();
}
If we end up with very complicated type constraints, then it makes sense to support a "`where` clause" that allows requirements to be stated outside of the generic parameter list:
void myFunc<T>(T val)
where T : Foo,
T : Bar
{}
Bot the use of `&` and `where` are advanced features that we might cut due to implementation complexity.
### Value Parameters
Because HLSL has generics like `vector<float,3>` that already take non-type parameters, the language will need *some* degree of support for generic parameters that aren't types (at least integers need to be supported).
We need syntax for this that doesn't bloat the common case.
In this case, I think that what I've used in the current Slang implementation is reasonable, where a value parameter needs a `let` prefix:
void someFunc<
T, // type parameter
T : X, // type parameter with constraint
T = Y, // type parameter with default
T : X = Y, // type parameter with constraint and default
let N : int, // value parameter (type must be explicit)
let N : int = 3> // value parameter with default
()
{ ... }
We should also extend the `where` clauses to support inequality constraints on (integer) value parameters to enforce rules about what ranges of integers are valid.
The front-end should issue error messages if it can statically determine these constraints are violated, but it should probably defer full checking until the IR (maybe... we need to think about how much of a dependent type system we are willing to have).
Associated Types
----------------
While the syntax is a bit different, the above mechanisms have approximately the same capabilities as Cg interfaces.
What the above approach can't handle (and neither can Cg) is a reusable definition of a surface material "pattern" that might blend multiple material layers to derive parameters for a specific BRDF.
That is, suppose we have two BRDFs: one with two parameters, and one with six.
Different surface patterns may want to target different BRDFs.
So if we write a `Material` interface like:
interface Material
{
BRDFParams evaluatePattern(float2 uv);
}
Then what should `BRDFParams` be? The two-parameter or six-parameter case?
An **associated type** is a concept that solves exactly this problem.
We don't care *what* the concrete type of `BRDFParams` is, so long as *every* implementation of `Material` has one.
The exact `BRDFParams` type can be different for each implementation of `Material`; the type is *associated* with a particular implementation.
We will crib our syntax for this entirely from Swift, where it is verbose but explicit:
interface Material
{
associatedtype BRDFParams;
BRDFParams evaluatePattern(float2 uv);
float3 evaluateBRDF(BRDFParams param, float3 wi, float3 wo);
}
In this example we've added an associated type requirement so that every implementation of `Material` must supply a type named `BRDFParams` as a member.
We've also added a requirement that is a function to evaluate the BRDF given its parameters and incoming/outgoing directions.
Using this declaration one can now define a generic function that works on any material:
float3 evaluateSurface<M : Material, L : Light>(
M material,
L[] lights,
float3 P_world,
float2 uv)
{
P.BRDFParams brdfParams = material.evaluatePattern(uv);
for(...)
{
L light = lights[i];
// ...
float3 reflectance = material.evaluateBRDF(brdfParams, ...);
}
}
Some quick notes:
- The use of `associatedtype` (for associated types) and `typealias` (for `typedef`-like definitions) as distinct keywords in Swift was well motivated by their experience (they used to use `typealias` for both). I would avoid having the two cases be syntactically identical.
- Swift has a pretty involved inference system where a type doesn't actually need to explicitly provide a type member with the chosen name. Instead, if you have a required method that takes or returns the associated type, then the compiler can infer what the type is by looking at the signature of the methods that meet other requirements. This is a complex and magical feature, and we shouldn't try to duplicate it.
- Both Rust and Swift call this an "associated type." They are related to "virtual types" in things like Scala (which are in turn related to virtual classes in beta/gbeta). There are similar ideas that arise in Haskell-like languages with type classes (IIRC, the term "functional dependencies" is relevant).
### Alternatives
I want to point out a few alternatives to the `Material` design above, just to show that associated types seem to be an elegant solution compared to the alternatives.
First, note that we could break `Material` into two interfaces, so long as we are allowed to place type constraints on associated types:
interface BRDF
{
float3 evaluate(float3 wi, float3 wo);
}
interface Material
{
associatedtype B : BRDF;
B evaluatePattern(float2 uv);
}
This refactoring might be cleaner if we imagine that a shader library would have family of reflectance functions (implementing `BRDF`) and then a large library of material patterns (implementing `Material`) - we wouldn't want each and every material to have to implement a dummy `evaluateBRDF` that just forwards to a BRDF instance nested in it.
Looking at that type `B` there, we might start to wonder if we could just replace this with a generic type parameter on the interface:
interface Material< B : BRDF >
{
B evaluatePattern(float2 uv);
}
This would change any type that implements `Material`:
// old:
struct MyMaterial : Material
{
typealias B = GGX;
GGX evaluatePattern(...) { ... }
}
// new:
struct MyMaterial : Material<GGX>
{
GGX evaluatePattern(...) { ... }
}
That doesn't seem so bad, but it ignores the complexity that arises at any use sites, e.g.:
float3 evaluateSurface<B : BRDF, M : Material<B>, L : Light>(
M material,
L[] lights,
float3 P_world,
float2 uv)
{ ... }
The type `B` which is logically an implementation detail of `M` now surfaces to the generic parameter list of any function that wants to traffic in materials.
This reduces the signal/noise ratio for anybody reading the code, and also means that any top-level code that is supposed to be specializing this function (suppose this was a fragment entry point) now needs to understand how to pick apart the `Material` it has on the host side to get the right type parameters.
This kind of issue has existed in the PL community at least as far back as the ML module system (it is tough to name search, but the concepts of "parameterization" vs. "fibration" is relevant here), and the Scala researchers made a clear argument (I think it was in the paper on "un-types") that there is a categorical distinction between the types that are logicall the *inputs* to an abstraction, and the types that are logically the *outputs*. Generic type parameters and associated types handle these two distinct roles.
Returning an Interface
----------------------
The revised `Material` definition:
interface BRDF
{
float3 evaluate(float3 wi, float3 wo);
}
interface Material
{
associatedtype B : BRDF;
B evaluatePattern(float2 uv);
}
has a function `evaluatePattern` that returns a type that implements an interface.
In the case where the return type is concrete, this isn't a problem (and the nature of associated types means that `B` will be concrete in any actual concrete implementation of `Material`).
There is an open question of whether it is ever necessary (or even helpful) to have a function that returns a value of *some* type known to implement an interface, without having to state that type in the function signature.
This is a point that has [come up](https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md) in the Rust world, where they have discussed using a keyword like `some` to indicate the existential nature of the result type:
// A function that returns *some* implementation of `Light`
func foo<T>() -> some Light;
The Rust proposal linked above has them trying to work toward `impl` as the keyword, and allowing it in both argument and result positions (to cover both universal and existential quantification).
In general, such a feature would need to have many constraints:
- The concrete return type must be fixed (even if clients of the function should be insulated from the choice), given the actual generic arguments provided.
- If the existential is really going to be sealed, then the caller shouldn't be allowed to assume anything *except* that two calls to the same function with identical generic arguments should yield results of identical type.
Under those constraints, it is pretty easy to see that an existential-returning method like:
interface Foo<T>
{
func foo<U>() -> some Bar;
}
can in principle be desugared into:
interface Foo<T>
{
associatedtype B<U> : Bar;
func foo<U>() -> B<U>;
}
with particular loss in what can be expressed.
The same desugaring approach should apply to global-scope functions that want to return an existential type (just with a global `typealias` instead of an `associatedtype`).
It might be inconvenient for the user to have to explicitly write the type-level expression that yields the result type (consider cases where C++ template metaprogrammers would use `auto` as a result type), but there is really no added power.
Object-Oriented Sugar
---------------------
Having to explicitly write out generic parameter lists is tedious, especially in the (common) case where we will have exactly one parameter corresponding to each generic type parameter:
// Why am I repeating myself?!
//
void foo<L : Light, M : Material, C : Camera)(
L light, M material, C camera);
The intent seems to be clear if we instead write:
void foo(Light light, Material material, Camera camera);
We could consider the latter to be sugar for the former, and allow users to write in familiar syntax akin to what ws already supported in Cg.
We'd have to be careful with such sugar, though, because there is a real and meaningful difference between saying:
- "`material` has type `Material` which is an interface type"
- "`material` has type `M` where `M` implements `Material`"
In particular, if we start to work with associated types:
let b = material.evaluatePattern(...);
It makes sense to say that `b` has type `M.BRDF`.
It does **not** make sense to say that `b` has type `Material.BRDF`, because there is no such concrete type.
(A third option is to say that `b` has type `material.BRDF`, which is basically the point where you have "virtual types" because we are now saying the type is a member of the *instance* and not of an enclosing *type*)
Note that the issue of having or not having object-oriented sugar is technically orthogonal from whether we allow "existential return types."
However, allowing the user to think of interfaces in traidtional OOP terms leads to it being more likely that they will try to declare:
- functions that return an interface type
- local variables of interface type (which they might even assign to!)
- fields of interface type in their `struct`s
All of these complicate the desugaring step, because we would de facto have types/functions that mix up two stages of evaluation: a compile-time type-level step and a run-time value-level step.
Ultimately, we'd probably need to express these by having a multi-stage IR (with two stages) which we optimize in the staged setting before stage-splitting to get separate type-level and value-level operations (akin to the desugaring for existential return types I described above).
My sense is that a certain amount of multi-stage programming may already be needed to deal with certain HLSL/GLSL idioms. In particular:
- GLSL supports passing unsigned arrays (e.g., `int[] a`) to a function, and then having the function use the size of the array (`a.length`) to do loops, etc. These would need to be lowered to distinct SPIR-V code for every array size used (if I understand the restrictions correctly), and so the feature is perhaps best thought of as passing both a compile-time integer parameter and a run-time array parameter (where the size comes from that parameter)
- HLSL and GLSL both have built-in functions where certain parameters are required to be compile-time constants. A feature-complete front-end must detect when calls to these functions are valid, and report errors to the user. In order to make the errors easier to explain to the user, it would be helpful to have an explicit notion of constant-rate computation, and require that the user express explicit constant-rate parameters/expressions.
All of this ties into the question of whether we need/want to support more general kinds of compile-time evaluation for specialization (e.g., statically-determine `if` statements or loops).
Other Languages
---------------
It is worth double-checking whether implementing all of this from scratch in Slang is a good idea, or if there is somewhere else we can achieve similar results more quickly:
- The Metal shading language has much of what we'd want. It is based on C++ templates, which are maybe not the ideal mechanism, and the compiler is closed-source so we can't easily add functionality. Still, it should be possible to prototype a lot of what we want on top of Metal 2.
- The open-source HLSL compiler doesn't support any of the new ideas here, but it may be that adding them to `dxc` would be faster than adding them to the Slang project code. Using `dxc` is a no-go for some of the other Slang requirements (that come from our users on the Falcor project).
- Swift already supports almost every thing on our list of requirements, but as it stands today there is no easy path to using it for low-level GPU code generation. It also fails to meet our goals for incremental adoption, high-level source output, etc.
In the long run, however, the Swift compiler seems like an attractive intercept for this work, because their long-term roadmap seems like it will close a lot of the gap with what we've done so far.
Conclusion
----------
This document has described the basic syntax and semantics for three related features -- interfaces, generics, and associated types -- along with some commentary on longer-term directions.
My expectation is that we will use the syntax as laid down here, unless we have a very good reason to depart from it, and we will prioritize implementation work as needed to get interesting shader library functionality up and running.

View file

@ -1,275 +0,0 @@
The Design of Slang's Intermediate Representation (IR)
======================================================
This document details some of the important design choices for Slang's IR.
Goals and Non-Goals
-------------------
The IR needs to balance many goals which can sometimes come into conflict.
We will start by enumerating these goals (and related non-goals) explicitly so that we can better motivate specific design choices.
* Obviously it must be simple to lower any source code in Slang code to the IR. It is however a non-goal for the lowering process to be lossless; we do not need to recover source-level program structure from the IR.
* The IR must be amenable to standard dataflow analyses and optimizations. It should be possible to read a paper on a compiler algorithm or technique and apply it to our IR in a straightforward manner, and with the expected asymptotic efficiency.
* As a particular case of analysis and optimization, it should be possible to validate flow-dependent properties of an input function/program (e.g., whether an `[unroll]` loop is actually unrollable) using the IR, and emit meaningful error messages that reference the AST-level names/locations of constructs involved in an error.
* It should be possible to compile modules to the IR separately and then "link" them in a way that depends only on IR-level (not AST-level) constructs. We want to allow changing implementation details of a module without forcing a re-compile of IR code using that module (what counts as "implementation details") is negotiable.
* There should be a way to serialize IR modules in a round-trip fashion preserving all of the structure. As a long-term goal, the serialized format should provide stability across compiler versions (working more as an IL than an IR)
* The IR must be able to encode "generic" (type-parameterized) constructs explicitly, and to express transformations from generic to specialized (or dynamic-dispatch) code in the IR. In particular, it must be possible for a module to make use of generic defined in another (separately-compiled) module, with validation performed before linking, and specialization performed after.
* The IR must be able to express code that is close to the level of abstraction of shader intermediate languages (ILs) like SPIR-V and DXIL, so that we can minimize the amount of work required (and the number of issues that can arise) when translating the IR to these targets. This can involve lowering and legalization passes to match the constraints of those ILs, but it should not require too much work to be done outside of the IR.
* It should be possible to translate code in the IR back into high-level-language code, including things like structured control-flow constructs.
* Whenever possible, invariants required by the IR should be built into its structure so that they are easier to maintain.
* We should strive to make the IR encoding, both in memory and when serialized, as compact as is practically possible.
Inspirations
------------
The IR design we currently use takes inspiration from three main sources:
* The LLVM project provides the basic inspiration for the approach to SSA, such as using a typed IR, the decision to use the same object to represent an instruction and the SSA value it produces, and the push to have an extremely simple `replaceAllUsesWith` primitive. It is easy to forget that it is possible to design a compiler with different design decisions; the LLVM ones just happen to both be well-motivated and well-known.
* The Swift IL (SIL) provides the inspiration for our approach for encoding SSA "phi nodes" (blocks with arguments), and also informs some of how we have approached encoding generics and related features like existential types.
* The SPIR-V IL provides the inspiration for the choice to uniformly represent types as instructions, for how to encode "join points" for structured control flow, and for the concept of "decorations" for encoding additional metadata on instructions.
Key Design Decisions
--------------------
### Everything is an Instruction
The Slang IR strives for an extremely high degree of uniformity, so almost every concept in the IR is ultimately just an instruction:
* Ordinary add/sub/mul/etc. operations are instructions, as are function calls, branches, function parameters, etc.
* Basic blocks in functions, as well as functions themselves are "parent instructions" that can have other instructions as children
* Constant values (e.g., even `true` and `false`) are instructions
* Types are instructions too, and can have operands (e.g., a vector type is the `VectorType` instruction applied to operands for the element type and count)
* Generics are encoded entirely using ordinary instructions: a generic is encoded like a function that just happens to do computation at the type level
* It isn't true right now, but eventually decorations will also be instructions, so that they can have operands like any other instruction
* An overall IR module is itself an instruction so that there is a single tree that owns everything
This uniformity greatly simplifies the task of supporting generics, and also means that operations that need to work over all instructions, such as cloning and serialization, can work with a single uniform representation and avoid special-casing particular opcodes.
The decision to use an extremely uniform design, even going as far to treat types as "ordinary" instructions, is similar to SPIR-V, although we do not enforce many of the constraints SPIR-V does on how type and value instructions can be mixed.
### Instructions Have a Uniform Structure
Every instruction has:
* An opcode
* A type (the top-level module is the only place where this can be null)
* Zero or more operands
* Zero or more decorations
* Zero or more children
Instructions are not allowed to have any semantically-relevant information that is not in the above list.
The only exception to this rule is instructions that represent literal constants, which store additional data to represent their value.
The in-memory encoding places a few more restrictions on top of this so that, e.g., currently an instruction can either have operands of children, but not both.
Because everything that could be used as an operand is also an instruction, the operands of an instruction are stored in a highly uniform way as a contiguous array of `IRUse` values (even the type is contiguous with this array, so that it can be treated as an additional operand when required).
The `IRUse` type maintains explicit links for use-def information, currently in a slightly bloated fashion (there are well-known techniques for reducing the size of this information).
### A Class Hierarchy Mirrored in Opcodes
There is a logical "class hierarchy" for instructions, and we support (but do not mandate) declaring a C++ `struct` type to expose an instruction or group of instructions.
These `struct` types can be helpful to encode the fact that the program knows an instruction must/should have a particular type (e.g., having a function parameter of type `IRFunction*` prevents users from accidentally passing in an arbitrary `IRInst*` without checking that it is a function first), and can also provide convenience accessors for operands/children.
Do make "dynamic cast" operations on this class hierarchy efficient, we arrange for the instruction opcodes for the in-memory IR to guarantee that all the descendents of a particular "base class" will occupy a contiguous range of opcodes. Checking that an instruction is in that range is then a constant-time operation that only looks at its opcode field.
There are some subtleties to how the opcodes are ordered to deal with the fact that some opcodes have a kind of "multiple inheritance" thing going on, but that is a design wart that we should probably remove over time, rather than something we are proud of.
### A Simpler Encoding of SSA
The traditional encoding of SSA form involves placing "phi" instructions at the start of blocks that represent control-flow join points where a variable will take on different values depending on the incoming edge that is taken.
There are of course benefits to sticking with tradition, but phi instructions also have a few downsides:
- The operands to phi instructions are the one case where the "def dominates use" constraint of SSA appears to be violated. I say "appears" because officially the action of a phi occurs on the incoming edge (not in the target block) and that edge will of course be dominated by the predecessor block. It still creates a special case that programmers need to be careful about. This also complicates serialization in that there is no order in which the blocks/instructions of a function can be emitted that guarantees that every instruction always precedes all of its uses in the stream.
- All of the phi instructions at the start of the block must effectively operate in parallel, so that they all "read" from the correct operand before "writing" to the target variable. Like the above special case, this is only a problem for a phi related to a loop back-edge. It is of course possible to always remember the special interpretation of phi instructions (that they don't actually execute sequentially like every other instruction in a block), but its another special case.
- The order of operands to a phi instruction needs to be related back to the predecessor blocks, so that one can determine which value is to be used for which incoming edge. Any transformation that modifies the CFG of a function needs to be careful to rewrite phi instructions to match the order in which predecessors are listed, or else the compiler must maintain a side data structure that remembers the mapping (and update it instead).
- Directly interpreting/executing code in an SSA IR with phi instructions is made more difficult because when branching to a block we need to immediately execute any phi instructions based on the block from which we just came. The above issues around phis needing to be executed in parallel, and needing to track how phi operands relate to predecessor blocks also add complexity to an interpreter.
Slang ditches traditional phi functions in favor of an alternative that matches the Swift IL (SIL).
The idea doesn't really start in Swift, but rather in the existing observation that SSA form IR and a continuation-passing style (CPS) IR are semantically equivalent; one can encode SSA blocks as continuation functions, where the arguments of the continuation stand in for the phi instructions, and a branch to the block becomes just a call.
Like Swift, we do not use an explicit CPS representation, but instead find a middle ground of a traditional SSA IR where instead of phi instructions basic blocks have parameters.
The first N instructions in a Slang basic block are its parameters, each of which is an `IRParam` instruction.
A block that would have had N phi instructions now has N parameters, but the parameters do not have operands.
Instead, a branch instruction that targets that block will have N *arguments* to match the parameters, representing the values to be assigned to the parameters when this control-flow edge is taken.
This encoding is equivalent in what it represents to traditional phi instructions, but nicely solves the problems outlined above:
- The phi operands in the successor block are now arguments in the *predecessor* block, so that the "def dominates use" property can be enforced without any special cases.
- The "assignment" of the argument values to parameters is now encoded with a single instruction, so that the simultaneity of all the assignments is more clear. We still need to be careful when leaving SSA form to obey those semantics, but there are no tricky issues when looking at the IR itself.
- There is no special work required to track which phi operands come from which predecessor block, since the operands are attached to the terminator instruction of the predecessor block itself. There is no need to update phi instructions after a CFG change that might affect the predecessor list of a block. The trade-off is that any change in the *number* of parameters of a block now requires changes to the terminator of each predecessor, but that is a less common change (isolated to passes that can introduce or eliminate block parameters/phis).
- It it much more clear how to give an operational semantics to a "branch with arguments" instead of phi instructions: compute the target block, copy the arguments to temporary storage (because of the simultaneity requirement), and then copy the temporaries over the parameters of the target block.
The main caveat of this representation is that it requires branch instructions to have room for arguments to the target block. For an ordinary unconditional branch this is pretty easy: we just put a variable number of arguments after the operand for the target block. For branch instructions like a two-way conditional, we might need to encode two argument lists - one for each target block - and an N-way `switch` branch only gets more complicated.
The Slang IR avoids the problem of needing to store arguments on every branch instruction by banning *critical edges* in IR functions that are using SSA phis/parameters. A critical edge is any edge from a block with multiple successors (meaning it ends in a conditional branch) to one with multiple predecessors (meaning it is a "join point" in the CFG).
Phi instructions/parameters are only ever needed at join points, and so block arguments are only needed on branches to a join point.
By ruling out conditional branches that target join points, we avoid the need to encode arguments on conditional branch instructions.
This constraint could be lifted at some point, but it is important to note that there are no programs that cannot be represented as a CFG without critical edges.
### A Simple Encoding of the CFG
A traditional SSA IR represents a function as a bunch of basic blocks of instructions, where each block ends in a *terminator* instruction.
Terminators are instructions that can branch to another block, and are only allowed at the end of a block.
The potential targets of a terminator determine the *successors* of the block where it appears, and contribute to the *predecessors* of any target block.
The successor-to-predecessor edges form a graph over the basic blocks called the control-flow graph (CFG).
A simple representation of a function would store the CFG explicitly as a graph data structure, but in that case the data structure would need to be updated whenever a change is made to the terminator instruction of a branch in a way that might change the successor/predecessor relationship.
The Slang IR avoids this maintenance problem by noting an important property.
If block `P`, with terminator `t`, is a predecessor of `S`, then `t` must have an operand that references `S`.
In turn, that means that the list of uses of `S` must include `t`.
We can thus scan through the list of predecessors or successors of a block with a reasonably simple algorithm:
* To find the successors of `P`, find its terminator `t`, identify the operands of `t` that represent successor blocks, and iterate over them. This is O(N) in the number of outgoing CFG edges.
* To find the predecessors of `S`, scan through its uses and identify users that are terminator instructions. For each such user if this use is at an operand position that represents a successor, then include the block containing the terminator in the output. This is O(N) in the number of *uses* of a block, but we expect that to be on the same order as the number of predecessors in practice.
Each of these actually iterates over the outgoing/incoming CFG *edges* of a block (which might contain duplicates if one block jumps to another in, e.g, multiple cases of a `switch`).
Sometimes you actually want the edges, or don't care about repeats, but in the case where you want to avoid duplicates the user needs to build a set to deduplicate the lists.
The clear benefit of this approach is that the predecessor/successor lists arise naturally from the existing encoding of control-flow instructions. It creates a bit of subtle logic when walking the predecessor/successor lists, but that code only needs to be revisited if we make changes to the terminator instructions that have successors.
### Explicit Encoding of Control-Flow Join Points
In order to allow reconstruction of high-level-language source code from a lower-level CFG, we need to encode something about the expected "join point" for a structured branch.
This is the logical place where control flow is said to "reconverge" after a branch, e.g.:
```hlsl
if(someCondition) // join point is "D"
{
A;
}
else
{
B;
if(C) return;
}
D;
```
Note that (unlike what some programming models would say) a join point is *not* necessarily a postdominator of the conditional branch. In the example above the block with `D` does not postdominate the block with `someCondition` nor the one with `B`. It is even possible to construct cases where the high-level join point of a control-flow construct is unreachable (e.g., the block after an infinite loop).
The Slang IR encodes structured control flow by making the join point be an explicit operand of a structured conditional branch operation. Note that a join-point operand is *not* used when computing the successor list of a block, since it does not represent a control-flow edge.
This is slightly different from SPIR-V where join points ("merge points" in SPIR-V) are encoded using a metadata instruction that precedes a branch. Keeping the information on the instruction itself avoids cases where we move one but not the other of the instructions, or where we might accidentally insert code between the metadata instruction and the terminator it modifies.
In the future we might consider using a decoration to represent join points.
When using a loop instruction, the join point is also the `break` label. The SPIR-V `OpLoopMerge` includes not only the join point (`break` target) but also a `continue` target. We do not currently represent structured information for `continue` blocks.
The reason for this is that while we could keep structured information about `continue` blocks, we might not be able to leverage it when generating high-level code, because the syntactic form of a `for` loop (the only construct in C-like languages where `continue` can go somewhere other than the top of the loop body) only allows an *expression* for the continue clause and not a general *statement*, but we cannot guarantee that after optimization the code in an IR-level "continue clause" would constitute a single expression.
The approach we use today means that the code in "continue clause" might end up being emitted more than once in final code; this is deemed acceptable because it is what `fxc` already does.
When it comes time to re-form higher-level structured control flow from Slang IR, we use the structuring information in the IR to form single-entry "regions" of code that map to existing high-level control-flow constructs (things like `if` statements, loops, `break` or `continue` statements, etc.).
The current approach we use requires the structuring information to be maintained by all IR transformations, and also currently relies on some invariants about what optimizations are allowed to do (e.g., we had better not introduce multi-level `break`s into the IR).
In the future, it would be good to investigate adapting the "Relooper" algorithm used in Emscripten so that we can recover valid structured control flow from an arbitrary CFG; for now we put off that work.
If we had a more powerful restructuring algorithm at hand, we could start to support things like multi-level `break`, and also ensure that `continue` clauses don't lead to code duplication any more.
## IR Global and Hoistable Value Deduplication
Types, constants and certain operations on constants are considered "global value" in the Slang IR. Some other insts like `Specialize()` and `Ptr(x)` are considered as "hoistable" insts, in that they will be defined at the outer most scope where their operands are available. For example, `Ptr(int)` will always be defined at global scope (as direct children of `IRModuleInst`) because its only operand, `int`, is defined at global scope. However if we have `Ptr(T)` where `T` is a generic parameter, then this `Ptr(T)` inst will be always be defined in the block of the generic. Global and hoistable values are always deduplicated and we can always assume two hoistable values with different pointer addresses are distinct values.
The `IRBuilder` class is responsible for ensuring the uniqueness of global/hoistable values. If you call any `IRBuilder` methods that creates a new hoistable instruction, e.g. `IRBuilder::createIntrinsicInst`, `IRBuilder::emitXXX` or `IRBuilder::getType`, `IRBuilder` will check if an equivalent value already exists, and if so it returns the existing inst instead of creating a new one.
The trickier part here is to always maintain the uniqueness when we modify the IR. When we update the operand of an inst from a non-hoistable-value to a hoistable-value, we may need to hoist `inst` itself as a result. For example, consider the following code:
```
%1 = IntType
%p = Ptr(%1)
%2 = func {
%x = ...;
%3 = Ptr(%x);
%4 = ArrayType(%3);
%5 = Var (type: %4);
...
}
```
Now consider the scenario where we need to replace the operand in `Ptr(x)` to `int` (where `x` is some non-constant value), we will get a `Ptr(int)` which is now a global value and should be deduplicated:
```
%1 = IntType
%p = Ptr(%1)
%2 = func {
%x = ...;
//%3 now becomes %p.
%4 = ArrayType(%p);
%5 = Var (type: %4);
...
}
```
Note this code is now breaking the invariant that hoistable insts are always defined at the top-most scope, because `%4` becomes is no longer dependent on any local insts in the function, and should be hoisted to the global scope after replacing `%3` with `%p`. This means that we need to continue to perform hoisting of `%4`, to result this final code:
```
%1 = IntType
%p = Ptr(%1)
%4 = ArrayType(%p); // hoisted to global scope
%2 = func {
%x = ...;
%5 = Var (type: %4);
...
}
```
As illustrated above, because we need to maintain the invariants of global/hoistable values, replacing an operand of an inst can have wide-spread effect on the IR.
To help ensure these invariants, we introduce the `IRBuilder.replaceOperand(inst, operandIndex, newOperand)` method to perform all the cascading modifications after replacing an operand. However the `IRInst.setOperand(idx, newOperand)` will not perform the cascading modifications, and using `setOperand` to modify the operand of a hoistable inst will trigger a runtime assertion error.
Similarly, `inst->replaceUsesWith` will also perform any cascading modifications to ensure the uniqueness of hoistable values. Because of this, we need to be particularly careful when using a loop to iterate the IR linked list or def-use linked list and call `replaceUsesWith` or `replaceOperand` inside the loop.
Consider the following code:
```
IRInst* nextInst = nullptr;
for (auto inst = func->getFirstChild(); inst; inst = nextInst)
{
nextInst = inst->getNextInst(); // save a copy of nestInst
// ...
inst->replaceUsesWith(someNewInst); // Warning: this may be unsafe, because nextInst could been moved to parent->parent!
}
```
Now imagine this code is running on the `func` defined above, imagine we are now at `inst == %3` and we want to replace `inst` with `Ptr(int)`. Before calling `replaceUsesWith`, we have stored `inst->nextInst` to `nextInst`, so `nextInst` is now `%4`(the array type). Now after we call `replaceUsesWith`, `%4` is hoisted to global scope, so in the next iteration, we will start to process `%4` and follow its `next` pointer to `%2` and we will be processing `func` instead of continue walking the child list!
Because of this, we should never be calling `replaceOperand` or `replaceUsesWith` when we are walking the IR linked list. If we want to do so, we must create a temporary workList and add all the insts to the work list before we make any modifications. The `IRInst::getModifiableChildren` utility function will return a temporary work list for safe iteration on the children. The same can be said to the def-use linked list. There is `traverseUses` and `traverseUsers` utility functions defined in `slang-ir.h` to help with walking the def-use list safely.
Another detail to keep in mind is that any local references to an inst may become out-of-date after a call to `replaceOperand` or `replaceUsesWith`. Consider the following code:
```
IRBuilder builder;
auto x = builder.emitXXX(); // x is some non-hoistable value.
auto ptr = builder.getPtrType(x); // create ptr(x).
x->replaceUsesWith(intType); // this renders `ptr` obsolete!!
auto var = builder.emitVar(ptr); // use the obsolete inst to create another inst.
```
In this example, calling `replaceUsesWith` will cause `ptr` to represent `Ptr(int)`, which may already exist in the global scope. After this call, all uses of `ptr` should be replaced with the global `Ptr(int)` inst instead. `IRBuilder` has provided the mechanism to track all the insts that are removed due to deduplication, and map those removed but not yet deleted insts to the existing inst. When using `ptr` to create a new inst, `IRBuilder` will first check if `ptr` should map to some existing hoistable inst in the global deduplication map and replace it if possible. This means that after the call to `builder.emitVar`, `var->type` is not equal to to `ptr`.
### Best Practices
In summary, the best practices when modifying the IR is:
- Never call `replaceUsesWith` or `replaceOperand` when walking raw linked lists in the IR. Always create a work list and iterate on the work list instead. Use `IRInst::getModifiableChildren` and `traverseUses` when you need to modify the IR while iterating.
- Never assume any local references to an `inst` is up-to-date after a call to `replaceUsesWith` or `replaceOperand`. It is OK to continue using them as operands/types to create a new inst, but do not assume the created inst will reference the same inst passed in as argument.

View file

@ -1,259 +0,0 @@
An overview of the Slang Compiler
=================================
This document will attempt to walk through the overall flow of the Slang compiler, as an aid to developers who are trying to get familiar with the codebase and its design.
More emphasis will be given to places where the compiler design is nontraditional, or might surprise newcomers; things that are straightforward won't get much detail.
High-Level Concepts
-------------------
Compilation is always performed in the context of a *compile request*, which bundles together the options, input files, and request for code generation.
Inside the code, there is a type `CompileRequest` to represent this.
The user specifies some number of *translation units* (represented in the code as a `TranslationUnitRequest`) which comprise some number of *sources* (files or strings).
HLSL follows the traditional C model where a "translation unit" is more or less synonymous with a source file, so when compiling HLSL code the command-line `slangc` will treat each source file as its own translation unit.
For Slang code, the command-line tool will by default put all source files into a single translation unit (so that they represent a shared namespace0).
The user can also specify some number of *entry points* in each translation unit (`EntryPointRequest`), which combines the name of a function to compile with the pipeline stage to compile for.
In a single compile request, we can generate code for zero or more *targets* (represented with `TargetRequest`) a target defines both the format for output code (e.g., DXIL or SPIR-V) and a *profile* that specifies the capability level to assume (e.g., "Shader Model 5.1").
It might not be immediately clear why we have such fine-grained concepts as this, but it ends up being quite important to decide which pieces of the compiler are allowed to depend on which pieces of information (e.g., whether or not a phase of compilation gets to depend on the chosen target).
The "Front End"
---------------
The job of the Slang front-end is to turn textual source code into a combination of code in our custom intermediate representation (IR) plus layout and binding information for shader parameters.
### Lexing
The first step in the compiler (after a source file has been loaded into memory) is to *lex* it.
The `Lexer` type is implement in `lexer.{h,cpp}` and produces `Token`s that represent the contents of the file on-demand as requested by the next phase of compilation.
Each token stores a `TokenCode` that indicates the kind of token, the raw text of the token, and the location in the source code where it is located.
Source locations use a somewhat clever encoding to avoid being bloated (they are a single integer rather than separate file, line, and column fields).
We don't make any attempt in the lexer to extract the actual value of integer and floating-point literals; we just store the raw text.
We also don't try to distinguish keywords from identifiers; keywords show up as ordinary identifier tokens.
Much of the complexity (and inefficiency) in the current lexer is derived from the need to support C-isms like backspace line continuation, and special case rules like allowing `<>` to delimit a file name string after a `#include`.
### Preprocessing
The preprocessor (`Preprocessor`) in `preprocessor.{h,cpp}` deals with `#include` constructs, macro expansions, etc.
It pulls tokens from the lexer as needed (making sure to set flags to control the lexer behavior when required) and uses a limited lookahead to decide what to do with each token.
The preprocessor maintains a stack of input streams, with the original source file at the bottom, and pushes entries for `#include`d files, macros to expand etc.
Macro definitions store a sequence of already-lexed tokens, and expansion simply "replays" these tokens.
Expansion keeps a notion of an "environment" for looking up identifiers and mapping them to macro definitions.
Calling through to a function-style macro creates a fresh environment that maps the macro parameter names to pseudo-macros for the arguments.
We still tokenize code in inactive preprocessor conditionals, but don't evaluate preprocessor directives inside inactive blocks (except those that may change the active/inactive state).
Preprocessor directives are each handled as a callback on the preprocessor state and are looked up by name; adding a new directive (if we ever had a reason to) is a fairly simple task.
One important detail of the preprocessor is that it runs over a full source file at once and produces a flat array of `Token`s, so that there is no direct interaction between the parser and preprocessor.
### Parsing
The parser (`Parser` in `parser.{h,cpp}`) is mostly a straightforward recursive-descent parser.
Because the input is already tokenized before we start, we can use arbitrary lookahead, although we seldom look ahead more than one token.
Traditionally, parsing of C-like languages requires context-sensitive parsing techniques to distinguish types from values, and deal with stuff like the C++ "most vexing parse."
Slang instead uses heuristic approaches: for example, when we encounter an `<` after an identifier, we first try parsing a generic argument list with a closing `>` and then look at the next token to determine if this looks like a generic application (in which case we continue from there) or not (in which case we backtrack).
There are still some cases where we use lookup in the current environment to see if something is a type or a value, but officially we strive to support out-of-order declarations like most modern languages.
In order to achieve that goal we will eventually move to a model where we parse the bodies of declarations and functions in a later pass, after we have resolved names in the global scope.
One important choice in the parser is that we strive to avoid hard-coding keywords as much as possible.
We already track an environment for C-like parsing, and we simply extend that so that we also look up declaration and statement keywords in the environment.
This means that most of the language "keywords" in Slang aren't keywords at all, and instead are just identifiers that happen to be bound to syntax in the default environment.
Syntax declarations are associated with a callback that is invoked to parse the construct they name.
The design of treating syntax as ordinary declarations has a long-term motivation (we'd like to support a flexible macro system) but it also has short-term practical benefits.
It is easy for us to add new modifier keywords to the language without touching the lexer or parser (just adding them to the core module), and we also don't have to worry about any of Slang's extended construct (e.g., `import`) breaking existing HLSL code that just happens to use one of those new keywords as a local variable name.
What the parser produces is an abstract syntax tree (AST).
The AST currently uses a strongly-typed C++ class hierarchy with a "visitor" API generated via some ugly macro magic.
Dynamic casting using C++ RTTI is used in many places to check the class of an AST node; we aren't happy with this but also haven't had time to implement a better/faster solution.
In the parsed AST, both types and expressions use the same representation (because in an expression like `A(B)` it is possible that `A` will resolve to a type, or to a function, and we don't know which yet).
One slightly odd design choice in the parser is that it attaching lexical scoping information to the syntax nodes for identifiers, and any other AST node that need access to the scope/environment where it was defined. This is a choice we will probably change at some point, but it is deeply ingrained right now.
### Semantic Checking
The semantic checking step (`check.{h,cpp}`) is, not surprisingly, the most complicated and messiest bit of the compiler today.
The basic premise is simple: recursively walk the entire AST and apply semantic checking to each construct.
Semantic checking applies to one translation unit at a time.
It has access to the list of entry points for the translation unit (so it can validate them), but it *not* allowed to depend on the compilation target(s) the user might have selected.
Semantic checking of an expression or type term can yield the same AST node, with type information added, or it can return newly constructed AST needs (e.g., when an implicit cast needs to be inserted).
Unchecked identifiers or member references are always resolved to have a pointer to the exact declaration node they are referencing.
Types are represented with a distinct class hierarchy from AST nodes, which is also used for a general notion of compile-time values which can be used to instantiate generic types/functions/etc.
An expression that ends up referring to a type will have a `TypeType` as its type, which will hold the actual type that the expression represents.
The most complicated thing about semantic checking is that we strive to support out-of-order declarations, which means we may need to check a function declaration later in the file before checking a function body early in the file.
In turn, that function declaration might depend on a reference to a nested type declared somewhere else, etc.
We currently solve this issue by doing some amount of on-demand checking; when we have a reference to a function declaration and we need to know its type, we will first check if the function has been through semantic checking yet, and if not we will go ahead and recursively type check that function before we proceed.
This kind of unfounded recursion can lead to real problems (especially when the user might write code with circular dependencies), so we have made some attempts to more strictly "phase" the semantic checking, but those efforts have not yet been done systematically.
When code involved generics and/or interfaces, the semantic checking phase is responsible for ensuring that when a type claims to implement an interface it provides all of the requirements of that interface, and it records the mapping from requirements to their implementations for later use. Similarly, the body of a generic is checked to make sure it uses type parameters in ways that are consistent with their constraints, and the AST is amended to make it explicit when an interface requirement is being employed.
### Lowering and Mandatory Optimizations
The lowering step (`lower-to-ir.{h,cpp}`) is responsible for converting semantically valid ASTs into an intermediate representation that is more suitable for specialization, optimization, and code generation.
The main thing that happens at this step is that a lot of the "sugar" in a high-level language gets baked out. For example:
- A "member function" in a type will turn into an ordinary function that takes an initial `this` parameter
- A `struct` type nested in another `struct` will turn into an ordinary top-level `struct`
- Compound expressions will turn into sequences of instructions that bake the order of evaluation
- High-level control-flow statements will get resolved to a control-flow graph (CFG) of basic blocks
The lowering step is done once for each translation unit, and like semantic checking it does *not* depend on any particular compilation target.
During this step we attach "mangled" names to any imported or exported symbols, so that each function overload, etc. has a unique name.
After IR code has been generated for a translation unit (now called a "module") we next perform a set of "mandatory" optimizations, including SSA promotion and simple copy propagation and elimination of dead control-flow paths.
These optimizations are not primarily motivated by a desire to speed up code, but rather to ensure that certain "obvious" simplifications have been performed before the next step of validation.
After the IR has been "optimized" we perform certain validation/checking tasks that would have been difficult or impossible to perform on the AST.
For example, we can validate that control flow never reached the end of a non-`void` function, and issue an error otherwise.
There are other validation tasks that can/should be performed at this step, although not all of them are currently implemented:
- We should check that any `[unroll]` loops can actually be unrolled, by ensuring that their termination conditions can be resolved to a compile-time constant (even if we don't know the constant yet)
- We should check that any resource types are being used in ways that can be statically resolved (e.g., that the code never conditionally computes a resource to reference), since this is a requirement for all our current targets
- We should check that the operands to any operation that requires a compile-time constant (e.g., the texel offset argument to certain `Sample()` calls) are passed values that end up being compile-time constants
The goal is to eliminate any possible sources of failure in low-level code generation, without needing to have a global view of all the code in a program.
Any error conditions we have to push off until later starts to limit the value of our separate compilation support.
### Parameter Binding and Type Layout
The next phase of parameter binding (`parameter-binding.{h,cpp}`) is independent of IR generation, and proceeds based on the AST that came out of semantic checking.
Parameter binding is the task of figuring out what locations/bindings/offsets should be given to all shader parameters referenced by the user's code.
Parameter binding is done once for each target (because, e.g., Vulkan may bind parameters differently than D3D12), and it is done for the whole compile request (all translation units) rather than one at a time.
This is because when users compile something like HLSL vertex and fragment shaders in distinct translation units, they will often share the "same" parameter via a header, and we need to ensure that it gets just one location.
At a high level, parameter binding starts by computing the *type layout* of each shader parameter.
A type layout describes the amount of registers/bindings/bytes/etc. that a type consumes, and also encodes the information needed to compute offsets/registers for individual `struct` fields or array elements.
Once we know how much space each parameter consumes, we then inspect an explicit binding information (e.g., `register` modifiers) that are relevant for the target, and build a data structure to record what binding ranges are already consumed.
Finally, we go through any parameters without explicit binding information and assign them the next available range of the appropriate size (in a first-fit fashion).
The parameter binding/layout information is what the Slang reflection API exposes. It is layered directly over the Slang AST so that it accurately reflects the program as the user wrote it, and not the result of lowering that program to our IR.
This document describes parameter binding as a "front end" activity, but in practice it is something that could be done in the front-end, the back-end or both.
When shader code involves generic type parameters, complete layout information cannot be generated until the values of these parameters are fully known, and in practice that might not happen until the back end.
### Serialization
It is not yet fully implemented, but our intention is that the last thing the front-end does is to serialize the following information:
- A stripped-down version of the checked AST for each translation unit including declarations/types, but not function bodies
- The IR code for each translation unit
- The binding/layout information for each target
The above information is enough to type-check a subsequent module that `import`s code compile in the front-end, to link against its IR code, or to load and reflect type and binding information.
The "Back End"
--------------
The Slang back end logically starts with the user specifying:
- An IR module, plus any necessary modules to link in and provide its dependencies
- An entry point in that module, plus arguments for any generic parameters that entry point needs
- A compilation target (e.g., SPIR-V for Vulkan)
- Parameter binding/layout information for that module and entry point, computed for the chosen target
We eventually want to support compiling multiple entry points in one pass of the back end, but for now it assumes a single entry point at a time
### Linking and Target Specialization
The first step we perform is to copy the chosen entry point and anything it depends on, recursively into a "fresh" IR module.
We make a copy of things so that any optimization/transformation passes we do for one target don't alter the code the front-end produced in ways that affect other targets.
While copying IR code into the fresh module, we have cases where there might be multiple definitions of the same function or other entity.
In those cases, we apply "target specialization" to pick the definition that is the best for the chosen target.
This step is where we can select between, say, a built-in definition of the `saturate` function for D3D targets, vs. a hand-written one in a Slang standard module to use for GLSL-based targets.
### API Legalization
If we are targeting a GLSL-based platform, we need to translate "varying" shader entry point parameters into global variables used for cross-stage data passing.
We also need to translate any "system value" semantics into uses of the special built-in `gl_*` variables.
We currently handle this kind of API-specific legalization quite early in the process, performing it right after linking.
### Generic Specialization
Once the concrete values for generic parameters are know we can set about specializing code to the known types.
We do this by cloning a function/type/whatever and substituting in the concrete arguments for the parameters.
This process can be continued as specializing one function may reveal opportunities to specialize others.
During this step we also specialize away lookup of interface requirements through their witness tables, once generic witness-table parameters have been replaced with concrete witness tables.
At the end of specialization, we should have code that makes no use of user-defined generics or interfaces.
### Type Legalization
While HLSL and Slang allow a single `struct` type to contain both "ordinary" data like a `float3` and "resources" like a `Texture2D`, the rules for GLSL and SPIR-V are more restrictive.
There are some additional wrinkles that arise for such "mixed" types, so we prefer to always "legalize" the types in the users code by replacing an aggregate type like:
```hlsl
struct Material { float4 baseColor; Texture2D detailMap; };
Material gMaterial;
```
with separate declarations for ordinary and resource fields:
```hlsl
struct Material { float4 baseColor; }
Material gMaterial;
Texture2D gMaterial_detailMap;
```
Changing the "shape" of a type like this (so that a single variable becomes more than one) needs to be done consistently across all declarations/functions in the program (hence why we do it after specialization, so that all concrete types are known).
### Other Optimizations
We dont' currently apply many other optimizations on the IR code in the back-end, under the assumption that the lower-level compilers below Slang will do some of the "heavy lifting."
That said, there are certain optimizations that Slang must do eventually, for semantic completeness. One of the most important examples of these is implementing the semantics of the `[unroll]` attribute, since we can't always rely on downstream compilers to have a capable unrolling implementation.
We expect that over time it will be valuable for Slang to support a wider array of optimization passes, as long as they are ones that are considered "safe" to do above the driver interface, because they won't interfere with downstream optimization opportunities.
### Emission
Once we have transformed the IR code into something that should be legal for the chosen target, we emit high-level source code in either HLSL or GLSL.
The emit logic is mostly just a scan over the IR code to emit a high-level declaration for each item: an IR structure type becomes a `struct` declaration, and IR function becomes a function definition, etc.
In order to make the generated code a bit more readable, the Slang compiler currently does *not* emit declarations using their mangled names and instead tries to emit everything using a name based on how it was originally declared.
To improve the readability of function bodies, the emit logic tries to find consecutive sequences of IR instructions that it can emit as a single high-level language expression. This reduces the number of temporaries in the output code, but we need to be careful about inserting parentheses to respect operator precedence, and also to not accidentally change the order of evaluation of code.
When emitting a function body, we need to get from the low-level control flow graph (CFG) to high-level structured control-flow statements like `if`s and loops. We currently do this on a per-function basis during code emission, using an ad hoc algorithm based on control-flow structured information we stored in the IR.
A future version of the compiler might implement something more complete like the "Relooper" algorithm used by Emscripten.
### Downstream Compiler Execution
Once we have source code, we can invoke downstream compilers like fxc, dxc, and glslang to generate binary code (and optionally to disassemble that code for console output).
The Slang compiler also supports a "pass through" mode where it skips most of the steps outlined so far and just passes text along to these downstream compilers directly. This is primarily intended as a debugging aid for developers working on Slang, since it lets you use the same command-line arguments to invoke both Slang compilation and compilation with these other compilers.
Conclusion
----------
Hopefully this whirlwind introduction to the flow of the Slang compiler gives some idea of how the project fits together, and makes it easier to dive into the code and start being productive.

View file

@ -1,216 +0,0 @@
Semantic Checking
=================
The semantic checking logic in the Slang compiler is located in `source/slang/slang-check*`.
Semantic checking is applied in the front end after parsing, and before lowering of code to the IR.
The main job of the semantic checking stage is to detect and forbid code that has errors in it.
The errors and other diagnostics reported are intended to be of benefit to the user, but semantic checking is also important for the overall function of the compiler.
Stages of compilation after semantic checking (e.g., lowering to the IR) are allowed to *assume* that the code they operate on is semantically valid, and may assert-fail or even crash on invalid code.
Semantic checking is thus not an optional step, and there is no meaningful way to turn it off.
Semantic Checking can be broken into three main kinds of work, and we will discuss how each is implemented in the following sections:
* Checking of "terms" which include expressions and type expressions
* Checking of statements
* Checking of declarations
Checking Terms
--------------
### Some Terminology for Terms
We use the word "term" to refer generically to something that can be evaluated to produce a result, but where we do not yet know if the result will be a type or a value. For example, `Texture2D` might be a term that results in a type, while `main` might be a term that results in a value (of function type), but both start out as a `NameExpr` in the AST. Thus the AST uses the class hierarchy under `Expr` to represent terms, whether they evaluate to values or types.
There is also the `Type` hierarchy, but it is important to understand that `Type` represents types as their logical immutable selves, while `Expr`s that evaluate to types are *type expressions* which can be concretely pointed to in the user's code. Type expressions have source locations, because they represent something the user wrote in their code, while `Type`s don't have singular locations by default.
The codebase uses the notion of a `TypeRepr` for those `Expr`s that should only ever evaluate to types, and there is also a `TypeExp` type that is meant to package up a `Type` with an optional `Expr` for a type expression that produced it. The names of these implementation types aren't great, and should probably not be spread further.
A value-bearing `Expr` will eventually be given a `Type` that describes the type of value it produces.
An `Expr` that evaluates to a type will eventually be given a `Type` that uses the `TypeType` subclass to indicate the specific type it evaluated to.
The `TypeType` idea is kind of kludge to represent "kinds" (the "types of types") in our system.
More correctly, we should say that every `Expr` gets a *classifier*, with the classifiers for value expressions being `Type`s and the classifiers for type expressions being kinds, but we haven't had time or inclination to fix the model yet.
### The Big Picture
Checking of terms is largely done as an ad hoc postorder traversal of the AST.
That is, in order to check a compound expression like `f(a)` we first need to check `f` and `a` before we can check the function call.
When checking an expression there are four main things that have to be done:
1. Recursively check all sub-expressions.
2. Detect and diagnose any errors (or warnings) in the current expression.
3. Optionally construct a new expression to replace the current expression (or one of its sub-expressions) in cases where the syntactic form of the input doesn't match the desired semantics (e.g., make an implicit type conversion explicit in the AST).
4. Determine the correct type for the result expression, and store it so that it can be used by subsequent checking.
Those steps may end up being interleaved in practice.
### Handling Errors Gracefully
If an error is detected in a sub-expression, then there are a few issues that need to be dealt with:
* We need to ensure that an erroneous sub-expression can't crash the compiler when it goes on to check a parent expression. For example, leaving the type of an expression as null when it has errors is asking for trouble.
* We ideally want to continue to diagnose other unrelated errors in the same expression, statement, function, or file. That means that we shouldn't just bail out of semantic checking entirely (e.g., by throwing an exception).
* We don't want to produce "cascading" errors where, e.g., an error in `a` causes us to also report an error in `a + b` because no suitable operator overload was found.
We tackle all of these problems by introducing the `ErrorType` and `ErrorExpr` classes.
If we can't determine a correct type for an expression (say, because it has an error) then we will assign it the type `ErrorType`.
If we can't reasonably form an expression to return *at all* then we will return an `ErrorExpr` (which has type `ErrorType`).
These classes are designed to make sure that subsequent code won't crash on them (since we have non-null objects), but to help avoid cascading errors.
Some semantic checking logic will detect `ErrorType`s on sub-expressions and skip its own checking logic (e.g., this happens for function overload resolution), producing an `ErrorType` further up.
In other cases, expressions with `ErrorType` can be silently consumed.
For example, an erroneous expression is implicitly convertible to *any* type, which means that assignment of an error expression to a local variable will always succeed, regardless of variable's type.
### Overload Resolution
One of the most involved parts of expression checking is overload resolution, which occurs when there is an expression of the form `f(...)` where `f` could refer to multiple function declarations.
Our basic approach to overload resolution is to iterate over all the candidates and add them to an `OverloadResolveContext`.
The context is responsible for keeping track of the "best" candidate(s) seen so far.
Traditionally a language defines rules for which overloads are "better" than others that focus only on candidates that actually apply to the call site.
This is the right way to define language semantics, but it can produce sub-optimal diagnostics when *no* candidate was actually applicable.
For example, suppose the user wrote `f(a,b)` and there are 100 functions names `f`, but none works for the argument types of `a` and `b`.
A naive approach might just say "no overload applicable to arguments with such-and-such types."
A more advanced compiler might try to list all 100 candidates, but that wouldn't be helpful.
If it turns out that of the 100 candidates, only 10 of them have two parameters, then it might be much more helpful to list only the 10 candidates that were even remotely applicable at the call site.
The Slang compiler strives to provide better diagnostics on overload resolution by breaking the checking of a candidate callee into multiple phases, and recording the earliest phase at which a problem was detected (if any).
Candidates that made it through more phases of checking without errors are considered "better" than other candidates, even if they ultimately aren't applicable.
### Type Conversions
Conversion of values from one type to another can occur both explicitly (e.g., `(int) foo`) and implicitly (e.g., `while(foo)` implicitly converts `foo` to a `bool`).
Type conversion also tied into overload resolution, since some conversions get ranked as "better" than others when deciding between candidates (e.g., converting an `int` to a `float` is preferred over converting it to a `double`).
We try to bottleneck all kinds of type conversion through a single code path so that the various kinds of conversion can be handled equivalently.
### L-Values
An *l-value* is an expression that can be used as the destination of an assignment, or for read-modify-write operations.
We track the l-value-ness of expressions using `QualType` which basically represents a `Type` plus a bit to note whether something is an l-value or not.
(This type could eventually be compressed down to be stored as a single pointer, but we haven't gotten to that yet)
We do not currently have a concept like the `const` qualifier in C/C++, that would be visible to the language user.
Propagation of l-value-ness is handled in an ad hoc fashion in the small number of expression cases that can ever produce l-values.
The default behavior is that expressions are not l-values and the implicit conversion from `Type` to `QualType` reflects this.
Checking Statements
-------------------
Checking of statements is relatively simpler than checking expressions.
Statements do not produce values, so they don't get assigned types/classifiers.
We do not currently have cases where a statement needs to be transformed into an elaborated form as part of checking (e.g., to make implicit behavior explicit), so statement checking operates "in place" rather than optionally producing new AST nodes.
The most interesting part of statement checking is that it requires information about the lexical context.
Checking a `return` statement requires knowing the surrounding function and its declared result type.
Checking a `break` statement requires knowing about any surrounding loop or `switch` statements.
We represent the surrounding function explicitly on the `SemanticsStmtVisitor` type, and also use a linked list of `OuterStmtInfo` threaded up through the stack to track lexically enclosing statements.
Note that semantic checking of statements at the AST level does *not* encompass certain flow-sensitive checks.
For example, the logic in `slang-check-stmt.cpp` does not check for or diagnose any of:
* Functions that fail to `return` a value along some control flow paths
* Unreachable code
* Variables used without being initialized first
All of the above are instead intended to be handled at the IR level (where dataflow analysis is easier) during the "mandatory" optimization passes that follow IR lowering.
Checking Declarations
---------------------
Checking of declarations is the most complicated and involved part of semantic checking.
### The Problem
Simple approaches to semantic checking of declarations fall into two camps:
1. One can define a total ordering on declarations (usually textual order in the source file) and only allow dependencies to follow that order, so that checking can follow the same order. This is the style of C/C++, which is inherited from the legacy of traditional single-pass compilers.
2. One can define a total ordering on *phases* of semantic checking, so that every declaration in the file is checked at phase N before any is checked at phase N+1. E.g., the types of all variables and functions must be determined before any expressions that use those variables/functions can be checked. This is the style of, e.g., Java and C#, which put a premium on defining context-free languages that don't dictate order of declaration.
Slang tries to bridge these two worlds: it has inherited features from HLSL that were inspired by C/C++, while it also strives to support out-of-order declarations like Java/C#.
Unsurprisingly, this leads to unique challenges.
Supporting out-of-order declarations means that there is no simple total order on declarations (we can have mutually recursive function or type declarations), and supporting generics with value parameters means there is no simple total order on phases.
For that last part observe that:
* Resolving an overloaded function call requires knowing the types of the parameters for candidate functions.
* Determining the type of a parameter requires checking type expressions.
* Type expressions may contain value arguments to generics, so checking type expressions requires checking value expressions.
* Value expressions can include function calls (e.g., operator invocations), which then require overload resolution to type-check.
### The Solution
Our declaration checking logic takes the idea of phase-based checking as a starting point, but instead of a global ordering on phases we use a per-declaration order.
Each declaration in the Slang AST will have a `DeclCheckState` that represents "how checked" that declaration is.
We can apply semantic checking logic to a declaration `D` to raise its state to some desired state `S`.
By default, the logic in `slang-check-decl.cpp` will do a kind of "breadth-first" checking strategy where it will try to raise all declarations to the one state before moving on to the next.
In many cases this will reproduce the behavior of a Java or C#-style compiler with strict phases.
The main difference for Slang is that whenever, during the checking of some declaration `D`, we discover that we need information from some other declaration `E` that would depend on `E` being in state `S`, we manually call a routine `ensureDecl(E,S)` whose job is to ensure that `E` has been checked enough for us to proceed.
The `ensureDecl` operation will often be a no-op, if the declaration has already been checked previously, but in cases where the declaration *hasn't* been checked yet it will cause the compiler to recursively re-enter semantic checking and try to check `E` until it reached the desired state.
In pathological cases, this method can result in unbounded recursion in the type checker. The breadth-first strategy helps to make such cases less likely, and introducing more phases to semantic checking can also help reduce problems.
In the long run we may need to investigate options that don't rely on unbounded recursion.
### The Rules
As a programmer contributing to the semantic checking infrastructure, the declaration-checking strategy requires following a few rules:
* If a piece of code is about to rely on some property of a declaration that might be null/absent/wrong if checking hasn't been applied, it should use `ensureDecl` to make sure the declaration in question has been checked enough for that property to be available.
* If adding some `ensureDecl`s leads to an internal compiler error because of circularity in semantic checking, then either the `ensureDecl`s were misplaced, or they were too strong (you asked for more checking than was necessary), or in the worse case we need to add more phases (more `DeclCheckState`s) to separate out the checking steps and break the apparent cycle.
* In very rare cases, semantic checking for a declaration may want to use `SetCheckState` to update the state of the declaration itself before recursively `ensureDecl`ing its child declarations, but this must be done carefully because it means you are claiming that the declaration is in some state `S`, while not having complete the checking that is associated with state `S`.
* It should *never* be necessary to modify `checkModuleDecl` so that it performs certain kinds of semantic analysis on certain declarations before others (e.g., iterate over all the `AggTypeDecl`s before all the `FuncDecl`s). If you find yourself tempted to modify it in such a way, then add more `DeclCheckState`s to reflect the desired ordering. It is okay to have phases of checking that only apply to a subset of declarations.
* Every statement and expression/term should be checked once and only once. If something is being checked twice and leading to failures, the right thing is to fix the source of the problem in declaration checking, rather than make the expression/statement checking be defensive against this case.
Name Lookup
-----------
Lookup is the processing of resolving the contextual meaning of names either in a lexical scope (e.g., the user wrote `foo` in a function body - what does it refer to?) or in the scope of some type (e.g., the user wrote `obj.foo` for some value `obj` of type `T` - what does it refer to?).
Lookup can be tied to semantic analysis quite deeply.
In order to know what a member reference like `obj.foo` refers to, we not only need to know the type of `obj`, but we may also need to know what interfaces that type conforms to (e.g., it might be a type parameter `T` with a constraint `T : IFoo`).
In order to support lookup in the presence of our declaration-checking strategy described above, the lookup logic may be passed a `SemanticsVisitor` that it can use to `ensureDecl()` declarations before it relies on their properties.
However, lookup also currently gets used during parsing, and in those cases it may need to be applied without access to the semantics-checking infrastructure (since we currently separate parsing and semantic analysis).
In those cases a null `SemanticsVisitor` is passed in, and the lookup process will avoid using lookup approaches that rely on derived semantic information.
This is fine in practice because the main thing that gets looked up during parsing are names of `SyntaxDecl`s (which are all global) and also global type/function/variable names.
Known Issues
------------
The largest known issue for the semantic checking logic is that there are currently dependencies between parsing and semantic checking.
Just like a C/C++ parser, the Slang parser sometimes needs to disambiguate whether an identifier refers to a type or value to make forward progress, and that would in general require semantic analysis.
Ideally the way forward is some combination of the following two strategies:
* We should strive to make parsing at the "global scope" fully context-insensitive (e.g., by using similar lookahead heuristics to C#). We are already close to this goal today, but will need to be careful that we do not introduce regressions compared to the old parser (perhaps a "compatibility" mode for legacy HLSL code is needed?)
* We should delay the parsing of nested scopes (both function and type bodies bracketed with `{}`) until later steps of the compiler. Ideally, parsing of function bodies can be done in a context-sensitive manner that interleaves with semantic checking, closer to the traditional C/C++ model (since we don't care about out-of-order declarations in function bodies).

View file

@ -1,331 +0,0 @@
Serialization
=============
Slang has a collection of serialization components. This document will be used to discuss serialization around IR/AST and modules as it currently exists. A separate document will describe the future serialization plans.
All of the serialization aspects here focus on binary serialization.
The major components are
* IR Serialization
* AST/Generalized Serialization
* SourceLoc Serialization
* Riff container
* C++ Extractor
Generalized Serialization
=========================
Generalized serialization is the mechanism used to save 'arbitrary' C++ structures. It is currently used for serializing the AST. Although not necessary, generalized serialization is typically helped out by the `C++ extractor`, which can rudimentary parse C++ source, and extract class-like types and their fields. The extraction then produces header files that contain macros that can then be used to drive serialization.
It's worth discussing briefly what the philosophy is behind the generalized serialization system. To talk about this design it is worth talking a little about serialization in general and the issues involved. Lets say we have a collection of C++ class instances that contain fields. Some of those fields might be pointers. Others of the fields might be a templated container type like a Dictionary<K,V>. We want to take all of these instances, write them to a file, such that when we read the file back we will have the equivalent objects with equivalent relationships.
We could imagine a mechanism that saved off each instance, by writing off the address of the object, and then the in memory representation for all the instances that can be reached. When reading back the objects would be at different locations in memory. If we knew where the pointers were, we could use a map of old pointers to the new instances and fix them up. Problems with this simple mechanism occur because...
* If we try to read back on a different machine, with a different pointer size, the object layout will be incompatible
* If we try to read back on the same machine where the source is compiled by a different compiler, the object layout might be incompatible (say bool or enum are different size)
* Endianness might be different
* Knowing where all the pointers are and what they point to and therefore what to serialize is far from simple.
* The alignment of types might be different across different processors and different compilers
The implementation makes a distinction between the 'native' types, the regular C++ in memory types and 'serial' types. Each serializable C++ type has an associated 'serial' type - with the distinction that it can be written out and (with perhaps some other data) read back in to recreate the C++ type. The serial type can be a C++ type, but is such it can be written and read from disk and still represent the same data.
The approach taken in Slang is to have each 'native' type (ie the C++ type) that is being serialized have a serializable 'dual' type. The serial type can be an explicit C++ type, or it might implicit (ie not have a C++ type) and calculated at Slang startup.
The important point here is that the Serial type must writable on one target/process and readable correctly on another.
The easy cases are types that have an alignment and representation that will work over all targets. These would be most built in types - integrals 8,16,32 and float32. Note that int64 and double are *not* so trivial, because on some targets that require 8 byte alignment - so they must be specially defined to have 8 byte alignment.
Another odd case is bool - it has been on some compilers 32 bits, and on others 8 bits. Thus we need to potentially convert.
For this and other types it is therefore necessary to have function that can convert to and from the serialized dual representation.
## Generalized Field Conversion
For types that contain fields, it would be somewhat laborious to have to write all of the conversion functions by hand. To avoid this we use the macro output of the C++ extractor to automatically generate the appropriate functions.
Take DeclRefExpr from the AST hierarchy - the extractor produces a macro something like...
```
#define SLANG_FIELDS_ASTNode_DeclRefExpr(_x_, _param_)\
_x_(scope, (RefPtr<Scope>), _param_)\
_x_(declRef, (DeclRef<Decl>), _param_)\
_x_(name, (Name*), _param_)
```
DeclRefExpr derives from Expr and this might hold other fields and so forth.
The macros can generate the appropriate conversion functions *if* we have the conversion functions for the field types. Field type conversions can be specified via a special macro that describes how the conversion to and from the type work. To make the association between the native and serial type, as well as provide the functions to convert, we use the template
```
template <typename T>
struct SerialTypeInfo;
```
and specialize it for each native type. The specialization holds
* SerialType - The type that will be used to represent the native type
* NativeType - The native type
* SerialAlignment - A value that holds what kind of alignment the SerialType needs to be serializable (it may be different from SLANG_ALIGN_OF(SerialType)!)
* toSerial - A function that with the help of SerialWriter convert the NativeType into the SerialType
* toNative - A function that with the help of SerialReader convert the SerialType into the NativeType
It is useful to have a structure that can hold the type information, so it can be stored. That is achieved with
```
template <typename T>
struct SerialGetType;
```
This template can be specialized for a specific native types - but all it holds is just a function getType, which returns a `SerialType*`, which just holds the information held in the SerialTypeInfo template, but additionally including the size of the SerialType.
So we need to define a specialized SerialTypeInfo for each type that can be a field in a NodeBase/RefObject derived type. We don't need to define anything explicitly for the NodeBase derived types, as we will just generate the layout from the fields. How do we know the fields? We just used the macros generated from the C++ extractor.
So first a few things to observe...
1) Some types don't need any conversion to be serializable - int8_t, or float the bits can just be written out and read in (1)
2) Some types need a conversion but it's very simple - for example an enum without explicit size, being written as an explicit size
3) Some types can be written out but would not be directly readable or usable with different targets/processors, so need converting
4) Some types require complex conversions that require programmer code - like Dictionary/List
For types that need no conversion (1), we can just use the template SerialIdentityTypeInfo
```
template <>
struct SerialTypeInfo<SomeType> : public SerialIdentityTypeInfo<SomeType> {};
```
This specialization means that SomeType can be written out and read in across targets/compilers without problems.
For (2) we have another template that will do the conversion for us
```
template <typename NATIVE_T, typename SERIAL_T>
struct SerialConvertTypeInfo;
```
That we can use as above, and specify the native and serial types.
For (3) there are a few scenarios. For any field in a serial type we must store in the serialized type such that the representation will work across all processors/compilers. So one problematic type is `bool`. It's not specified how it's laid out in memory - and some compiles have stored it as a word. Most recently it's been stored as a byte. To make sure bool is ok for serialization therefore we store as a uint8_t.
Another example would be double. It's 64 bits, but on some arches/compilers it's SLANG_ALIGN_OF is 4 and on others it's 8. On some architectures a non aligned read will lead to a fault, on others it might be very slow. To work around this problem therefore we have to ensure double has the alignment that will work across all targets - and that alignment is 8. In that specific case that issue is handled via SerialBasicTypeInfo, which makes the SerialAlignment the sizeof the type.
For (4) there are a few things to say. First a type can always implement a custom version of how to do a conversion by specializing `SerialTypeInfo`. But there remains another nagging issue - types which allocate/use other memory that changes at runtime. Clearly we cannot define 'any size of memory' in a fixed SerialType defined in a specialization of SerialTypeInfo. The mechanism to work around this is to allow arbitrary arrays to be stored, that can be accessed via an SerialIndex. This will be discussed more once we discuss a little more about the file system, and SerialIndex.
## Struct value types
There is a mechanism to allow the simple serialization of 'value' struct types for this to work it requires
* The fields of the struct are serializable and public
* The super class (if there is one) is serializable
If this is the case, it is not necessary to write a `SerialTypeInfo<T>` specialization, the C++ extractor and it's reflection can generate the specialization for you. The steps needed
* Place SLANG_VALUE_CLASS(your type) in the definition of your struct
* Make sure that the header containing the struct definition is included in the ones C++ extractor examines
* Instead of implementing SerialTypeInfo for your type use the macro SLANG_VALUE_TYPE_INFO(your type)
If there are problems looking at the contents of `slang-generated-value.h` and `slang-generated-value-macro.h`.
It should be noted that currently because of limitations in the C++ extractor, all of the types must be defined in the same scope.
Also because value types are always fields in generalized serialization, they do not need to be identified with a sub type, even though C++ extractor does generate a ValueType enum.
## Generalized Serialization Format
The serialization format used is 'stream-like' with each 'object' stored in order. Each object is given an index starting from 1. 0 is used to be in effect nullptr. The stream looks like
```
SerialInfo::Entry (for index 1)
Payload for type in entry
SerialInfo::Entry (for index 2)
Payload for type in entry
...
...
```
That when writing we have an array that maps each index to a pointer to the associated header. We also have a map that maps native pointers to their indices. The Payload *is* the SerialType for thing saved. The payload directly follows the Entry data. Each object in this list can only be a few types of things
* NodeBase derived type
* RefObject derived type
* String
* Array
The actual Entry followed by the payloads are allocated and stored when writing in a MemoryArena. When we want to write into a stream, we can just iterate over each entry in order and write it out.
You may have spotted a problem here - that some Entry types can be stored without alignment (for example a string - which stores the length VarInt encoded followed by the characters). Others require an alignment - for example an NodeBase derived type that contains a int64_t will *require* 8 byte alignment. That as a feature of the serialization format we want to be able to just map the data into memory, and be able to access all the SerialType as is on the CPU. For that to work we *require* that the payload for each entry has the right alignment for the associated SerialType.
To achieve this we store in the Entry it's alignment requirement *AND* the next entries alignment. With this when we read, as we as stepping through the entries we can find where the next Entry starts. Because the payload comes directly after the Entry - the Entrys size must be a modulo of the largest alignment the payload can have.
For the code that does the conversion between native and serial types it uses either the SerialWriter or SerialReader. This provides the mechanism to turn a pointer into a serializable `SerialIndex` and vice versa. There are some special functions for turning string like types to and forth.
The final mechanism is that of 'Arrays'. An array allows reading or writing a chunk of data associated with a `SerialIndex`. The chunk of data *must* hold data that is serializable. If the array holds pointers - then the serialized array must hold an array of `SerialIndex` values that represent those pointers. When reading back in `SerialIndex` is converted back to a pointer.
Arrays are the escape hatch that allows for more complex types to serialize. Dictionaries for example are saved as a serial type that is two SerialIndices one to a keys array and one to a values array.
Note that writing has two phases, serializing out into an SerialWriter, and then secondly writing out to a stream.
## Object/Reference Types
When talking about Object/Reference types this means types that can be referenced natively as pointers. Currently that means `NodeBase` and `SerialRefObject` derived types.
The SerialTypeInfo mechanism is generally for *fields* of object types. That for derived types we use the C++ extractors field list to work out the native fields offsets and types. With this we can then calculate the layout for NodeBase types such that they follow the requirements for serialization - such as alignment and so forth.
This information is held in the SerialClasses, which for a given TypeKind/SubType gives a SerialClassInfo, that specifies fields for just that type.
It is trivial to work out the SubType for a NodeBase derived class - its just the astTypeNode member in the `NodeBase` type. For a SerialRefObject it is determined by first calling
```
const ReflectClassInfo* getClassInfo() const;
```
Then the m_classID in the `ReflectClassInfo` is the subtype.
## Reading
Due to the care in writing reading is relatively simple. We can just take the contents of the file and put in memory, as long as in memory it has an alignment of at least MAX_ALIGNMENT. Then we can build up an entries table by stepping through the data and writing the pointer.
The toNative functions take an SerialReader - this allows the implementation to ask for pointers and arrays from other parts of the serialized data. It also allows for types to be lazily reconstructed if necessary.
Lazy reconstruction may be useful in the future to partially reconstruct a sub part of the serialized data. In the current implementation, lazy evaluation is used on Strings. The m_objects array holds all of the recreated native 'objects'. Since the objects can be derived from different base classes the associated Entry will describe what it really is.
For the String type, we initially store the object pointer as null. If a string is requested from that index, we see if the object pointer is null, if it is we have to construct the StringRepresentation that will be used. An extra wrinkle is that we allow accessing of a serialized String as a Name or a string or a UnownedSubString. Fortunately a Name just holds a string, and a Name remains in scope as long as it's NamePool does which is passed in.
### Serial type replacement
In generalized serialization systems such as with Java there is a mechanism for reference types to replace their representation on writing, and then on reading replace the read type with the actual type. Write replacement is already used when serializing out modules via the `SerialFilter` mechanism. The actual implementation is `ModuleSerialFilter`, if an object is referenced in a different module that is explicitly specified, it is replaced with `ImportExternalDecl`, that names the actual definition to use.
Currently when deserializing, the `ImportExternalDecl` is *not* turned back into the item it references. This means there are likely pointers which point to invalid objects.
If we wanted to do a replacement on reconstruction we could
We could modify reading as follows.
1) Don't construct anything at the start
2) Find 'root's they must be created and deserialized first
. Any read/writeReplace is a root
. Any marked (like SourceLocData) is a root. (When deconstructed it also needs to add information to the Reader)
. The root of the objects (note we could just deserialize first to last if not already constructed)
3) During deserialization pointer references and constructed on demand
4) Extra code is needed to make sure there aren't cycles. Any object is either Pre/Created/Deserialized.
### Other reading issues
As touched on elsewhere SourceLoc information has to be carefully handled. Within the generalized serialization we have the additional problem that we probably don't want to attach SourceLoc or other types explicitly to the SerialReader/SerialWriter. The mechanism to work around this is via the `SerialExtraObjects` structure. This allows types to optionally be available to the Reader/Writer without it having to explicitly know anything about the type.
For all types supporting this mechanism they *require* that they are added to the `SerialExtraType` enum, and that they embed a static kExtraType field in the type. This solution is not as flexible as perhaps using a string map or something of that sort, but it does make lookup very fast and simple which is likely significant as many types contain the SourceLoc type for example.
## Identifying Types
How a NodeBase derived type identifies itself is not directly compatible with how a SerialRefObject represents itself. The NodeBase derived type uses `ASTNodeType` enum. The SerialRefObject uses a `RefObjectType` enum. Thus to uniquely identify a type we typically actually need two bits of information the `SerialTypeKind` as well as the `SerialSubType`.
```
enum class SerialTypeKind : uint8_t
{
Unknown,
String, ///< String
Array, ///< Array
NodeBase, ///< NodeBase derived
RefObject, ///< RefObject derived types
CountOf,
};
```
String and Array are special cases described elsewhere.
If the `SerialTypeKind` is `NodeBase`, then the `SerialSubType` *is* the ASTNodeType. If the `SerialTypeKind` is `RefObject` then the `SerialSubType` *is* RefObjectType.
`SerialClasses` holds the information on how to serialize non-field Serial types. For each `SerialTypeKind`/`SerialSubType` it holds a `SerialClass`. The SerialClass holds the size of the type, the amount of fields, and the field information. The fields themselves contain a `SerialFieldType` - this holds the pointers to the functions to convert to and from `native` to `serial` types.
In order to set up all types in a SerialClass without tying SerialClasses to an implementation the class `SerialClassesUtil` is used to set up Slang serialized types in a `SerialClasses` instance.
IR Serialization
================
Currently IR serialization is handled via a separate mechanism to 'generalized' serialization.
This mechanism is *much* simpler than generalized serialization, because by design the IR types are very homogeneous in style. There are a few special cases, but in general an instruction consists of
* It's type
* A SourceLoc
* 0 or more operands.
* 0 or more children.
Within the IR instructions are pointers to IRInst derived types. As previously discussed serializing pointers directly is generally not a good idea. To work around this the pointers are turned into 32 bit indices. Additionally we know that an instruction can belong to at most one other instruction.
When serializing out special handling is made for child instructions - their indices are made to be a contiguous range of indices for all instructions that belong to each parent. The indices are ordered into the same order as the children are held in the parent. By using this mechanism it is not necessary to directly save off the indices that belong to a parent, only the range of indices.
The actual serialization mechanism is similar to the generalized mechanism - referenced objects are saved off in order of their indices. What is different is that the encoding fixes the size of the Inst to `IRSerialData`. That this can hold up to two operands, if the instruction has more than two operands then one of the UInt32 is the operand count and the other is an offset to a list of operands. It probably makes sense to alter this in the future to stream the instructions payload directly.
IR serialization allows a simple compression mechanism, that works because much of the IR serialized data is UInt32 data, that can use a variable byte encoding.
AST Serialization
=================
AST serialization uses the generalized serialization mechanism.
When serializing out an AST module it is typical to want to just serialize out the definitions within that module. Without this, the generalized serializer will crawl over the whole of the AST structure serializing every thing that can be reached - including the whole of the core module.
The filter `ModuleSerialFilter` can be used when writing the AST module, it will replace any references to elements outside of the current module with a `ImportExternalDecl`. This contains a mangled name to the item being referenced in another module.
When serializing back in, it may be possible to turn these references into the actual element, if the module containing the definition has been loaded. This probably can't work in general though, as if we have two modules that reference items in the other, then it isn't possible to fix up on load.
A way around this would be to not replace on reading (or only replace items that can be found). Then go through the `ImportExternalDecl` elements doing the lookup, and potentially loading other modules. There are several issues here though
* On first loading pointers that have been replaced will claim to be a type they are typically *NOT*
* Once we have determined what `ImportExternalDecl` should replaced with, how do we replace it?
On the first point, this is perhaps undesirable (on a variety of levels - such as debugging), but isn't as terrible as it could be, as the actual type identification is managed by Slang via the `astTypeNode`. So there is a simple way of identifying what the type actually is.
On the second point - this isn't so simple. If we had an indirection, we could do the replacement quickly and trivially, without having to to fix up all the pointers. We probably don't want to add such an indirection into the pointer based system so choices are
* Store where all the pointers are, and fix them up
* Traverse the hierarchy replacing pointers
Within the current mechanism storing where all the pointers are is not so simple - it would require the setting of any pointer to record where that pointer is stored, and for that to remain the location. Doing so would require setting all pointers to go through some recording mechanism. Pointers held in containers - like the Dictionary may not be directly available. Moreover even if they *were* doing such a behavior may break the containers invariants - for example replacing a keys pointer, may change it's hash.
Traversing the hierarchy would be something akin to the serialization process. It would require specially handling for field types to do the replacement. There would need to be special handling for struct value types.
SourceLoc Serialization
=======================
SourceLoc serialization presents several problems. Firstly we have two distinct serialization mechanisms that need to use it - IR serialization and generalized serialization. That being the case it cannot be saved directly in either, even though it may be referenced by either.
To keep things simple for now we build up SourceLoc information for both IR and general serialization via their writers adding their information into a SerialSourceLocWriter. Then we can save this information into a RIFF section, that can be loaded before either general or IR deserialization is used.
When reading the SourceLoc information has to be located and deserialized before any AST or IR deserialization. The SourceLoc data can then be turned into a SerialSourceLocReader, which is then either set on the `SerialReaders` `SerialExtraObjects`. Or passed to the `IRSerialReader`.
Riff Container
==============
[Riff](https://en.wikipedia.org/wiki/Resource_Interchange_File_Format) is used as a mechanism to store binary sections. The format allows for a hierarchy of `chunks` that hold binary data. How the data is interpreted depends on the [FOURCC](https://en.wikipedia.org/wiki/FourCC) associated with each chunk.
As previously touched on there are multiple different mechanisms used for serialization. IR serialization, generalized serialization, SourceLoc serialization - there are also other uses, such as serializing of entry point information. Riff is used to combine all of these incompatible binary parts together such that they can be stored together.
The handling of these riff containers is held within the `SerialContainerUtil` class.
C++ Extractor
=============
The C++ Extractor is the tool `slang-cpp-extractor` that can be used to example C++ files to extract class definitions and associated fields. These files contain in the form of macros information about each class as well as reflected fields. These generated files can then be used to implement serialization without having to explicitly specify fields in C++ source code.
Issues
======
* No support for forward/backward compatibility.
** Adding fields/classes will typically break compatibility
* Binary files do not contain data to describe themselves
** It is *not* possible to write a stand alone tool that can dump any serialized file - it's iterpretation depends on the version of Slang it was written from
* The Riff mechanism use for container usage is somewhat ad-hoc
* Re-referencing AST nodes from other modules does not happen automatically on deserialization
* There are several mechanisms used for serialization that are not directly compatible
## C++ extractor issues
* All types (and typedefs) that are serialized must be defined in the same scope - child types don't work correctly
* When using value serialization serialization all the members that are serializable must be public
* The types output in slang fields do not correctly take into account scope (this is a similar issue to the issue above)

View file

@ -1,260 +0,0 @@
Core Module Intrinsics
======================
The following document aims to cover a variety of systems used to add target specific features. They are most extensively used in the slang core module.
**NOTE!** These features should *not* be considered stable! They can be used in regular slang code to add features, but they risk breaking with any Slang version change. Additionally the features implementation can be very particular to what is required for a specific feature set, so might not work as expected in all scenarios.
As these features are in flux, it is quite possible this document is behind the current features available within the Slang code base.
If you want to add support for a feature for a target to Slang, implementing it as a part of the Slang standard modules is typically a good way to progress. Depending on the extension/feature it may not be possible to add support exclusively via changes to the standard module alone. That said most support for target specific extensions and features involve at least some changes to the slang standard modules including the core module, and typically using the mechanisms described here.
## Core Module
The main place these features are used are within the slang core module. This is implemented with a set of slang files within the slang project
* core.meta.slang
* hlsl.meta.slang
* diff.meta.slang
Looking at these files will demonstrate the features in use.
Most of the intrinsics and attributes have names that indicate that they are not for normal use. This is typically via a `__` prefix.
The `.meta.slang` files look largely like Slang source files, but their contents can also be generated programmatically with C++ code. A section of code can drop into `C++` code if it is proceeded by `${{{{`. The C++ section is closed with a closing `}}}}`. This mechanism is typically used to generate different versions of a similar code sequence. Values from the C++ code can be accessed via the `$()`, where the contents of the brackets specifies something that can be calculated from within the C++ code.
As an example, to produce an an array with values 0 to 9 we could write...
```slang
// Slang code
${{{{
// C++ code, calling out to a C++ function getTime, the result is held in variable time
int cppTime = getTime();
}}}}
// Back to Slang code, can access the C++ variable previously defined as cppTime. Due to $().
// The code inside the $() is executed on the C++ side, so can do calculations. In practice it would be easier
// to just use call $(getTime() + 1), but this demonstrates variables are accessible.
int slangTime = $(cppTime + 1);
```
# Attributes
## [__readNone]
A `[__readNone]` indicates a function that computes its results strictly based on argument values, without reading or writing through any pointer arguments, or any other state that could be observed by a caller.
## [__NoSideEffect]
Specifies a function declaration has no observable side effects.
## [__unsafeForceInlineEarly]
Inlines the contained code, but does so very early stage. Being earlier allows allows some kinds of inlining transformations to work, that wouldn't work with regular inlining. It also means it must be used with *care*, because it may produce unexpected results for more complex scenarios.
## [__NonCopyableType]
Marks a type to be non-copyable, causing SSA pass to skip turning variables of the the type into SSA values.
## [__AlwaysFoldIntoUseSiteAttribute]
A call to the decorated function should always be folded into its use site.
## [KnownBuiltin("name")]
A `[KnownBuiltin("name")]` attribute allows the compiler to identify this declaration during compilation, despite obfuscation or linkage removing optimizations
# Intrinsics
<a id="target-intrinsic"></a>
## __target_intrinsic(target, expansion)
This is a widely used and somewhat complicated intrinsic. Placed on a declaration it describes how the declaration should be emitted for a target. The complexity is that `expansion` is applied via a variety of rules. `target` is a "target capability", commonly it's just the emit target for the intrinsic, so one of...
* hlsl
* glsl
* cuda - CUDA
* cpp - C++ output (used for exe, shared-library or host-callable)
* spirv - Used for slangs SPIR-V direct mechanism
A function definition can have a `target_intrinsic` *and* a body. In that case, the body will be used for targets where the `target_intrinsic` isn't defined.
If the intrinsic can be emitted as is, the expansion need not be specified. If only the *name* needs to changed (params can be passed as is), only the name to be expanded to needs to be specified *without* `()`. In this scenario it is not necessary to specify as a string in quotes, and just the identifier name can be used.
Currently `HLSL` has a special handling in that it is *assumed* if a declaration exists that it can be emitted verbatim to HLSL.
The target can also be a capability atom. The atoms are listed in "slang-capability-defs.h".
What is perhaps of importance here is that for some features for a specific target can have multiple ways of achieving the same effect - for example "GL_NV_ray_tracing" and "GL_EXT_ray_tracing" are two different ray tracing extensions available for Vulkan through GLSL. The `-profile` option can disambiguate which extension is actually desired, and the capability with that name on the `target_intrinsic` specifies how to implement that feature for that specific extension.
The expansion mechanism is implemented in "slang-intrinsic-expand.cpp" which will be most up to date.
The `expansion` value can be a string or an identifier. If it is an identifier, it will just be emitted as is replacing the name of the declaration the intrinsics is associated with.
Sections of the `expansion` string that are to be replaced are prefixed by the `$` sigil.
* $0-9 - Indicates the parameter at that index. For a method call $0 is `this`.
* $T0-9 - The type for the param at the index. If the type is a texture resource derived type, returns the *element* type.
* $TR - The return type
* $G0-9 - Replaced by the type/value at that index of specialization
* $S0-9 - The scalar type of the generic at the index.
* $p - Used on texturing operations. Produces the combined texture sampler arguments as needed for GLSL.
* $C - The $C intrinsic is a mechanism to change the name of an invocation depending on if there is a format conversion required between the type associated by the resource and the backing ImageFormat. Currently this is only implemented on CUDA, where there are specialized versions of the RWTexture writes that will do a format conversion.
* $E - Sometimes accesses need to be scaled. For example in CUDA the x coordinate for surface access is byte addressed. $E will return the byte size of the *backing element*.
* $c - When doing texture access in GLSL the result may need to be cast. In particular if the underlying texture is 'half' based, GLSL only accesses (read/write) as float. So we need to cast to a half type on output. When storing into a texture it is still the case the value written must be half - but we don't need to do any casting there as half is coerced to float without a problem.
* $z - If we are calling a D3D texturing operation in the form t.Foo(s, ...), where `t` is a Texture&lt;T&gt;, then this is the step where we try to properly swizzle the output of the equivalent GLSL call into the right shape.
* $N0-9 - Extract the element count from a vector argument so that we can use it in the constructed expression.
* $V0-9 - Take an argument of some scalar/vector type and pad it out to a 4-vector with the same element type (this is the inverse of `$z`).
* $a - We have an operation that needs to lower to either `atomic*` or `imageAtomic*` for GLSL, depending on whether its first operand is a subscript into an array. This `$a` is the first `a` in `atomic`, so we will replace it accordingly.
* $A - We have an operand that represents the destination of an atomic operation in GLSL, and it should be lowered based on whether it is an ordinary l-value, or an image subscript. In the image subscript case this operand will turn into multiple arguments to the `imageAtomic*` function.
* $XP - Ray tracing ray payload
* $XC - Ray tracing callable payload
* $XH - Ray tracing hit object attribute
* $P - Type-based prefix as used for CUDA and C++ targets (I8 for int8_t, F32 - float etc)
## __specialized_for_target(target)
Specialized for target allows defining an implementation *body* for a particular target. The target is the same as is used for [__target_intrinsic](#target-intrinsic).
A declaration can consist of multiple definitions with bodies (for each target) using, `specialized_for_target`, as well as having `target_intrinsic` if that is applicable for a target.
## __attributeTarget(astClassName)
For an attribute, specifies the AST class (and derived class) the attribute can be applied to.
## __builtin
Identifies the declaration is being "builtin".
## __builtin_requirement(requirementKind)
A modifier that indicates a built-in associated type requirement (e.g., `Differential`). The requirement is one of `BuiltinRequirementKind`.
The requirement value can just be specified via the `$()` mechanism.
## __builtin_type(tag)
Specifies a builtin type - the integer value of one of the enumeration BaseType.
## __magic_type(clsName, tag)
Used before a type declaration. The clsName is the name of the class that is used to represent the type in the AST in Slang *C++* code. The tag is an optional integer value that is in addition and meaningful in the context of the class type.
##__intrinsic_type(op)
Used to specify the IR opcode associated with a type. The IR opcode is listed as something like `$(kIROp_HLSLByteAddressBufferType)`, which will expand to the integer value of the opcode (because the opcode value is an enum value that is visible from C++). It is possible to just write the opcode number, but that is generally inadvisable as the ids for ops are not stable. If a code change in Slang C++ adds or removes an opcode the number is likely to be incorrect.
As an example from the core module
```slang
__magic_type(HLSLByteAddressBufferType)
__intrinsic_type($(kIROp_HLSLByteAddressBufferType))
struct ByteAddressBuffer
{
// ...
};
```
# General
## __generic<>
Is an alternate syntax for specifying a declaration that is generic. The more commonly used form is to list the generic parameters in `<>` after the name of the declaration.
## attribute_syntax
Attribute syntax provides a mechanism to introduce an attribute type in Slang.
Right now the basic form is:
```
attribute_syntax [name(parmName: paramType, ...)] : syntaxClass;
```
There can be 0 or more params associated with the attribute, and if so the () are not needed.
* `name` gives the name of the attribute to define.
* `paramName` is the name of param that are specified with attribute use
* `paramType` is the type of the value associated with the param
* `syntaxClass` is the name of an AST node class that we expect this attribute to create when checked.
For example
```
__attributeTarget(FuncDecl)
attribute_syntax [CudaDeviceExport] : CudaDeviceExportAttribute;
```
Defines an attribute `CudaDeviceExport` which can only be applied to FuncDecl or derived AST types. Once semantically checked will be turned into a `CudaDeviceExportAttribute` attribute in the AST.
With a parameter
```
__attributeTarget(InterfaceDecl)
attribute_syntax [anyValueSize(size:int)] : AnyValueSizeAttribute;
```
Defines an attribute `anyValueSize` that can be applied to `InterfaceDecl` and derived types. It takes a single parameter called `anyValueSize` of `int` type.
## Ref<T>
Allows returning or passing a value "by reference".
# GLSL/Vulkan specific
## __glsl_version(version)
Used to specify the GLSL version number that is required for the subsequent declaration. When Slang emits GLSL source, the version at the start of the file, will be the largest version seen that emitted code uses.
For example
```slang
__glsl_version(430)
```
## __glsl_extension
Specifies the GLSL extension that is required for the declaration to work. A declaration that has the intrinsic, when output to GLSL will additionally add `#extension` to the the GLSL or SPIR-V output.
Multiple extensions can be applied to a decoration if that is applicable, if there are multiple ways of implementing that can be emitted in the same manner (see the section around [target](#target-intrinsic)) for more details.
## __spirv_version
When declaration is used for SPIR-V target will take the highest value seen to be the SPIR-V version required. For compilation through GLSLANG, the value is passed down to to GLSLANG specifying this SPIR-V is being targeted.
Example
```
__spirv_version(1.3)
```
## vk::spirv_instruction
Provides a way to use a limited amount of `GL_EXT_spirv_intrinsics` the extension.
```
vk::spirv_instruction(op, set)
```
Op is the integer *value* for the op. The `set` is optional string which specifies the instruction set the op is associated with.
For example
```
__specialized_for_target(glsl)
[[vk::spirv_instruction(1, "NonSemantic.DebugBreak")]]
void debugBreak();
```
# CUDA specific
## __cuda_sm_version
When declaration is used with this intrinsic for a CUDA target, the highest shader model seen will be passed down to the downstream CUDA compile (NVRTC).
# NVAPI
## [__requiresNVAPI]
If declaration is reached during a compilation for an applicable target (D3D11/12), will indicate that [NVAPI support](../nvapi-support.md) is required for declaration to work.

View file

@ -1,114 +0,0 @@
Slang Doc System
================
Slang contains a rudimentary documentation generation system. The mechanism used to mark up source is similar to [doxygen](https://www.doxygen.nl/manual/docblocks.html). Namely
```
/**
... text ... (JavaDoc style)
*/
void someFunctionA() {}
/*!
.. text .. (QT style)
another line
*/
void someFunctionB() {}
/// ... text ... (Multi line)
/// another line
void someFunctionC() {}
//!... text ... (QT Multi line)
//! another line
void someFunctionD() {}
```
All of the above examples will add the documentation for the declaration that appears after them. Also note that this slightly diverges from doxygen in that an empty line before and after in a multi line comment is *not* required.
We can also document the parameters to a function similarly
```
/// My function
void myFunction(
/// The A parameter
int a,
/// The B parameter
int b);
```
If you just need a single line comment to describe something, you can place the documentation after the parameter as in
```
/// My function
void myFunction( int a, //< The A parameter
int b) //< The B parameter
{}
```
This same mechanisms work for other kinds of common situations such as with enums
```
/// An enum
enum AnEnum
{
Value, ///< A value
/// Another value
/// With a multi-line comment
AnotherValue,
};
```
Like `doxygen` we can also have multi line comments after a declaration for example
```
/// An enum
enum AnEnum
{
Value, ///< A value
///< Some more information about `Value`
/// Another value
/// With a multi-line comment
AnotherValue,
};
```
To actually get Slang to output documentation you can use the `-doc` option from the `slangc` command line, or pass it in as parameter to `spProcessCommandLineArguments` or `processCommandLineArguments`. The documentation is currently output by default to the same `ISlangWriter` stream as diagnostics. So for `slangc` this will generally mean the terminal/stderr.
Currently the Slang doc system does not support any of the 'advanced' doxygen documentation features. If you add documentation to a declaration it is expected to be in [markdown](https://guides.github.com/features/mastering-markdown/).
Currently the only documentation style supported is a single file 'markdown' output. Future versions will support splitting into multiple files and linking between them. Also future versions may also support other documentation formats/standards.
It is possible to generate documentation for the slang core module. This can be achieved with `slangc` via
```
slangc -doc -compile-core-module
```
The documentation will be written to a file `stdlib-doc.md`.
It should be noted that it is not necessary to add markup to a declaration for the documentation system to output documentation for it. Without the markup the documentation is going to be very limited, in essence saying the declaration exists and other aspects that are available from the source. This may not be very helpful. For this reason and other reasons there is a mechanism to control the visibility of items in your source.
There are 3 visibility levels 'public', 'internal' and 'hidden'/'private'. There is a special comment that controls visibility for subsequent lines. The special comment starts with `//@` as shown below.
```
//@ public:
void thisFunctionAppearsInDocs() {}
//@ internal:
void thisFunctionCouldAppearInInternalDocs() {}
//@ hidden:
void thisFunctionWillNotAppearInDocs() {}
```

View file

@ -1,42 +0,0 @@
Frequently Asked Questions
==========================
### How did this project start?
The Slang project forked off from the ["Spire"](https://github.com/spire-lang/spire) shading language research project.
In particular, Slang aims to take the lessons learned in that research effort (about how to make more productive shader compilation languages and tools) and apply them to a stystem that is easier to adopt, and hopefully more amenable to production use.
### Why should I use Slang instead of glslang, hlsl2glslfork, the Microsoft open-source HLSL compiler, etc.?
If you are mostly just shopping around for a tool to get HLSL shaders working on other graphics APIs, then [this](http://aras-p.info/blog/2014/03/28/cross-platform-shaders-in-2014/) blog post is probably a good place to start.
If one of those tools meets your requirements, then you should probably use it.
Slang is a small project, and early in development, so you might find that you hit fewer bumps in the road with one of the more established tools out there.
The goal of the Slang project is not to make "yet another HLSL-to-GLSL translator," but rather to create a shading language and supporting toolchain that improves developer productivity (and happiness) over the existing HLSL language and toolchain, while providing a reasonable adoption path for developers who have an existing investment in HLSL shader code.
If you think that is something interesting and worth supporting, then please get involved!
### What would make a shading language more productive?
This is probably best answered by pointing to the most recent publication from the Spire research project:
[Shader Components: Modular and High Performance Shader Development](http://graphics.cs.cmu.edu/projects/shadercomp/)
Some other papers for those who would like to read up on our inspiration:
[A System for Rapid Exploration of Shader Optimization Choices](http://graphics.cs.cmu.edu/projects/spire/)
[Spark: Modular, Composable Shaders for Graphics Hardware](https://graphics.stanford.edu/papers/spark/)
### Who is using Slang?
Right now the only user of Slang is the [Falcor](https://github.com/NVIDIA/Falcor) real-time rendering framework developed and used by NVIDIA Research.
The implementation of Slang has so far focused heavily on the needs of Falcor.
### Won't we all just be using C/C++ for shaders soon?
The great thing about both Vulkan and D3D12 moving to publicly-documented binary intermediate languages (SPIR-V and DXIL, respectively) is that there is plenty of room for language innovation on top of these interfaces.
Having support for writing GPU shaders in a reasonably-complete C/C++ language would be great.
We are supportive of efforts in the "C++ for shaders" direction.
The Slang effort is about trying to solve the challenges that are unique to the real-time graphics domain, and that won't magically get better by switching to C++.

View file

@ -1,264 +0,0 @@
---
layout: user-guide
---
Getting Started with Slang Graphics Layer
============================================
[//]: # (ShortTitle: Getting Started)
In this article, we provide instructions on installing the graphics layer into your application, and demonstrate the basic use of the graphics layer via a simple compute shader example. We will use the same [hello-world.slang](https://github.com/shader-slang/slang/blob/master/examples/hello-world/hello-world.slang) shader from the `hello-world` example in the [Slang getting started tutorial](../user-guide/01-get-started.html).
Installation
------------------
### Obtain Release Package
The Slang graphics library is implemented in `gfx.dll` (`libgfx.so` in unix systems). Since Slang is tightly integrated into the graphics layer, you need to include both `slang.dll` and `gfx.dll` in your application. Official Slang releases provide prebuilt binaries for both libraries as well as the header files to use them. If you prefer to build the libraries yourself, please follow [build instructions](../building).
### Install Header Files
Once you have built or obtained a Slang release, make the following header files from the release package accessible to your application:
- `slang-gfx.h`
- `slang.h`
- `slang-com-ptr.h`
- `slang-com-helper.h`
### Linking the Library
On Windows (with `msvc`), make sure that `gfx.lib` is provided as linker input via the `Linker->Input->Additional Dependencies` project configuration. On Unix systems, make sure to pass `-lgfx` when compiling your application.
Creating a GPU Device
---------------------------
To start using the graphics layer, create an `IDevice` object by calling `gfxCreateDevice`. The `IDevice` interface is the main entry-point to interact with the graphics layer. It represent GPU device context where all interactions with the GPU take place.
```cpp
#include "slang-gfx.h"
using namespace gfx;
IDevice* gDevice = nullptr;
void initGfx()
{
IDevice::Desc deviceDesc = {};
gfxCreateDevice(deviceDesc, &gDevice);
}
```
The `IDevice::Desc` struct passed to `gfxCreateDevice` defines many configurations on how a device shall be created. Most notably, the `deviceType` field specifies what underlying graphics API to use. By default, `gfxCreateDevice` will attempt to use the best API available on current platform. On Windows, the layer will prefer to use `D3D12` but will also try to use `Vulkan`, `D3D11`, `OpenGL` in order, in case the former API isn't available. On Unix systems, it will always default to `Vulkan` since this is the only API that supports full Graphics capabilities. A user can always specify the `deviceType` field to force the layer to use a specific API. If the device creation succeeds, `gfxCreateDevice` will return `SLANG_OK(0)`.
Similar to the Slang API, objects created by the graphics layer also conforms to the COM standard. The user to responsible for calling `release` method on every object returned to the user by the layer to prevent memory leaks.
Enabling the Debug Layer
--------------------------
The Slang Graphics Layer provides a debug layer that can be enabled to perform additional validations to ensure correctness. To enable the debug layer, simply call `gfxEnableDebugLayer` before calling `gfxCreateDevice`.
To receive diagnostic messages, you need to create a class that implements the `IDebugCallback` interface, and call `gfxSetDebugCallback` to provide the callback instance to the graphics layer. For example:
```cpp
struct MyDebugCallback : public IDebugCallback
{
virtual SLANG_NO_THROW void SLANG_MCALL handleMessage(
DebugMessageType type,
DebugMessageSource source,
const char* message) override
{
printf("%s\n", message);
}
};
MyDebugCallback gCallback;
void initGfx()
{
gfxEnableDebugLayer();
gfxSetDebugCallback(&gCallback);
IDevice::Desc deviceDesc = {};
gfxCreateDevice(&deviceDesc, &gDevice);
}
```
Creating a Command Queue
------------------------------
A command queue is where the GPU device takes commands from the application to execute. To create a command queue, call `IDevice::createCommandQueue`.
```cpp
ICommandQueue* gQueue = nullptr;
ICommandQueue::Desc queueDesc = {ICommandQueue::QueueType::Graphics};
device->createCommandQueue(queueDesc, &gQueue);
```
Allocating a Command Buffer
------------------------------
A command buffer is treated as a _transient_ resource by the graphics layer. A transient resource is required by the GPU during execution of a task, and are no longer needed when the execution has completed. Slang graphics layer provides an `ITransientResourceHeap` object to efficiently manage the life cycle of transient resources. In order to allocate a command buffer, we need to create an `ITransientResourceHeap` object first by calling `IDevice::createTransientResourceHeap`.
```cpp
ITransientResourceHeap* gTransientHeap;
ITransientResourceHeap::Desc transientHeapDesc = {};
transientHeapDesc.constantBufferSize = 4096;
device->createTransientResourceHeap(transientHeapDesc, &gTransientHeap);
```
With a `TransientResourceHeap`, we can call `createCommandBuffer` method to allocate a command buffer:
```cpp
ICommandBuffer* commandBuffer;
gTransientHeap->createCommandBuffer(&commandBuffer);
```
A user should regularly call `ITransientResourceHeap::synchronizeAndReset` to recycle all previously allocated transient resources. A standard practice is to create two `TransientResourceHeap`s in a double-buffered renderer, and alternate the transient heap on each frame to allocate command buffers and other transient resources. With this setup, the application can call `synchronizeAndReset` at start of each frame on the corresponding transient resource heap to make sure all transient resources are timely recycled.
Creating Buffer Resource
------------------------------
We need to create the buffer resources used our `hello-world` shader as input and output. This can be done via `IDevice::createBufferResource` method. When creating a resource, the user must specify a resource state that the resource will be in by default, as well as all allowed resource states the resource can be in. Resource states in the graphics layer follows the same model of resource states in D3D12, and the user can also assume the same automatic resource promotion/demotion behavior in D3D12.
```cpp
const int numberCount = 4;
float initialData[] = {0.0f, 1.0f, 2.0f, 3.0f};
IBufferResource::Desc bufferDesc = {};
bufferDesc.sizeInBytes = numberCount * sizeof(float);
bufferDesc.format = Format::Unknown;
bufferDesc.elementSize = sizeof(float);
bufferDesc.defaultState = ResourceState::UnorderedAccess;
bufferDesc.allowedStates = ResourceStateSet(ResourceState::UnorderedAccess,
ResourceState::ShaderResource);
IBufferResource* inputBuffer0;
SLANG_RETURN_ON_FAIL(device->createBufferResource(
bufferDesc,
(void*)initialData,
&inputBuffer0));
```
Creating a Pipeline State
---------------------------
A pipeline state object encapsulates the shader program to execute on the GPU device, as well as other fix function states for graphics rendering. In this example, we will be compiling and running a simple compute shader written in Slang. To do that we need to create a compute pipeline state from a Slang `IComponentType`. We refer the reader to the (Slang getting started tutorial)[../user-guide/01-getting-started.html] on how to create a Slang `IComponentType` from a shader file. The following source creates a Graphics layer `IPipelineState` object from a shader module represented by a `slang::IComponentType` object:
```cpp
void createComputePipelineFromShader(
IComponentType* slangProgram,
IPipelineState*& outPipelineState)
{
// The `IComponentType` parameter that represents the compute
// kernel, we can use it to create a `IShaderProgram` object in the graphics
// layer.
IShaderProgram* shaderProgram = nullptr;
IShaderProgram::Desc programDesc = {};
programDesc.pipelineType = PipelineType::Compute;
programDesc.slangProgram = slangProgram;
gDevice->createShaderProgram(programDesc, &shaderProgram);
// Create a compute pipeline state from `shaderProgram`.
ComputePipelineStateDesc pipelineDesc = {};
pipelineDesc.program = shaderProgram;
gDevice->createComputePipelineState(pipelineDesc, &outPipelineState);
// Since we no longer need to use `shaderProgram` after creating
// a pipeline state, we should release it to prevent memory leaks.
shaderProgram->release();
}
```
Recording Commands to Run a Compute Shader
------------------------------------
[//]: # (ShortTitle: Recording Commands)
Now that we have created all the resources and allocated a command buffer, we can start recording commands to
set the compute pipeline state, bind shader parameters, and dispatch a kernel launch.
Since we are only using compute commands, we begin the recording by calling `ICommandBuffer::encodeComputeCommands`. This methods returns a transient `IComputeCommandEncoder` object for accepting actual compute commands.
```cpp
IComputeCommandEncoder* encoder = commandBuffer->encodeComputeCommands();
```
The first command is to bind the pipeline state we created earlier:
```cpp
IShaderObject* rootObject = encoder->bindPipeline(pipelineState);
```
Binding a pipeline state yields a transient `IShaderObject` object. We can use the `IShaderObject` instance to bind shader parameters. For the `hello-world` shader, we need to bind three parameters: `buffer0`, `buffer1` and `result`.
```cpp
// Create a resource view for buffer0.
IBufferView* buffer0View;
{
IResourceView::Desc viewDesc = {};
viewDesc.type = IResourceView::Type::ShaderResource;
viewDesc.format = Format::Unknown;
SLANG_RETURN_ON_FAIL(device->createBufferView(inputBuffer0, viewDesc, &buffer0View));
}
// Bind the resource view to shader.
rootObject->setResource(ShaderOffset{0,0,0}, buffer0View);
// Create a resource view for buffer1.
IBufferView* buffer1View;
{
IResourceView::Desc viewDesc = {};
viewDesc.type = IResourceView::Type::ShaderResource;
viewDesc.format = Format::Unknown;
SLANG_RETURN_ON_FAIL(device->createBufferView(inputBuffer1, viewDesc, &buffer1View));
}
// Bind the resource view to shader.
rootObject->setResource(ShaderOffset{0,1,0}, buffer1View);
// Create a resource view for resultBuffer.
IBufferView* resultView;
{
IResourceView::Desc viewDesc = {};
viewDesc.type = IResourceView::Type::UnorderedAccess;
viewDesc.format = Format::Unknown;
SLANG_RETURN_ON_FAIL(device->createBufferView(resultBuffer, viewDesc, &resultView));
}
rootObject->setResource(ShaderOffset{0,2,0}, resultView);
```
> #### Note
> Since `rootObject` is a transient object returned by the command encoder, it is automatically released
> with the command encoder. Calling `release` on `rootObject` is OK but not needed.
After binding all shader parameters, we can now dispatch the kernel:
```cpp
encoder->dispatchCompute(1, 1, 1);
```
> #### Note
> Command encoders are transient objects managed by a command buffer, it is automatically released
> with the command buffer. Calling `release` on `rootObject` is OK but not needed.
When we are done recording commands, we need to close the command encoder and the command buffer.
```cpp
encoder->endEncoding();
commandBuffer->close();
```
Now we are ready to submit the command buffer to the command queue, and wait for the GPU execution to finish.
```cpp
gQueue->executeCommandBuffer(commandBuffer);
gQueue->wait();
```
Cleaning Up
----------------
At the end of our example, we need to make sure all created objects are released by calling the `release` method:
```cpp
commandBuffer->release();
gQueue->release();
gTransientResourceHeap->release();
inputBuffer0->release();
buffer0View->release();
...
gDevice->release();
```
The order of calls to `release` does not matter, as long as all objects are released from the user.

View file

@ -1,25 +0,0 @@
---
layout: user-guide
---
Slang Graphics Layer
=============
The Slang Graphics Layer is an abstraction library of graphics APIs to support cross-platform applications that utilize GPU graphics/compute capabilities. The Slang Graphics Layer tightly integrates the Slang shading language to provide the most complete cross-platform GPU application development experience. The Slang language and compilation API is designed to work best when the application assumes several best practices in terms of shader specialization and parameter binding. The Slang Graphics Layer is following exactly the same best practices supported by Slang's compilation model. Outside of shader-related areas, the graphics layer's interface is designed to closely follow the modern graphics API models in Direct3D 12, Vulkan and Metal, such that the layer is only purposed to abstracting the differences between these underlying APIs instead of providing a higher level abstract that simplifies the interface. This design philosophy allows users to benefit from the ideas in the Slang shading language without giving up precise control on other aspects of the graphics API.
The current support status of operating system and graphics APIs is shown in the following matrix.
| | Windows | Linux |
| :------------ | :----------------: | :----------------: |
| Direct3D 12 | Yes | No |
| Direct3D 11 | Yes | No |
| Vulkan | Yes | Yes |
| OpenGL | Yes | No |
| CPU emulation | Yes (Compute Only) | Yes (Compute Only) |
| CUDA | Yes (Compute Only) | Yes (Compute Only) |
> #### Note
> The graphics layer is still under active development and we intend to add more platforms and APIs in the future.
In this documentation, we will walk through various parts of the library and demonstrate how it can be used in your application.

View file

@ -1,5 +0,0 @@
<nav>
<li><a href="../../">Docs</a></li>
<li><a href="index.html">Slang Graphics Layer</a></li>
</nav>

View file

@ -1,18 +0,0 @@
<ul class="toc_root_list"><li data-link="index"><span>Slang Graphics Layer</span>
<ul class="toc_list">
<li data-link="01-getting-started"><span>Getting Started</span>
<ul class="toc_list">
<li data-link="01-getting-started#installation"><span>Installation</span></li>
<li data-link="01-getting-started#creating-a-gpu-device"><span>Creating a GPU Device</span></li>
<li data-link="01-getting-started#enabling-the-debug-layer"><span>Enabling the Debug Layer</span></li>
<li data-link="01-getting-started#creating-a-command-queue"><span>Creating a Command Queue</span></li>
<li data-link="01-getting-started#allocating-a-command-buffer"><span>Allocating a Command Buffer</span></li>
<li data-link="01-getting-started#creating-buffer-resource"><span>Creating Buffer Resource</span></li>
<li data-link="01-getting-started#creating-a-pipeline-state"><span>Creating a Pipeline State</span></li>
<li data-link="01-getting-started#recording-commands-to-run-a-compute-shader"><span>Recording Commands</span></li>
<li data-link="01-getting-started#cleaning-up"><span>Cleaning Up</span></li>
</ul>
</li>
</ul>
</li>
</ul>

View file

@ -1,266 +0,0 @@
Unsupported Formats
======================
GFX currently does not support the following listed D3D and Vulkan formats.
With the exception of `D24_UNORM_S8_UINT`, these formats have been omitted as
their counterpart API does not have a corresponding format. `D24_UNORM_S8_UINT`
has been omitted as it is only supported by Nvidia.
- `DXGI_FORMAT_R32G8X24_TYPELESS`
- `DXGI_FORMAT_D32_FLOAT_S8X24_UINT`
- `DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS`
- `DXGI_FORMAT_X32_TYPELESS_G8X24_UINT`
- `DXGI_FORMAT_R24G8_TYPELESS`
- `DXGI_FORMAT_D24_UNORM_S8_UINT`
- `DXGI_FORMAT_R24_UNORM_X8_TYPELESS`
- `DXGI_FORMAT_X24_TYPELESS_G8_UINT`
- `DXGI_FORMAT_A8_UNORM`
- `DXGI_FORMAT_R1_UNORM`
- `DXGI_FORMAT_R8G8_B8G8_UNORM`
- `DXGI_FORMAT_G8R8_G8B8_UNORM`
- `DXGI_FORMAT_BC1_TYPELESS`
- `DXGI_FORMAT_BC2_TYPELESS`
- `DXGI_FORMAT_BC3_TYPELESS`
- `DXGI_FORMAT_BC4_TYPELESS`
- `DXGI_FORMAT_BC5_TYPELESS`
- `DXGI_FORMAT_B8G8R8X8_UNORM`
- `DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM`
- `DXGI_FORMAT_B8G8R8X8_TYPELESS`
- `DXGI_FORMAT_B8G8R8X8_UNORM_SRGB`
- `DXGI_FORMAT_BC6H_TYPELESS`
- `DXGI_FORMAT_BC7_TYPELESS`
- `DXGI_FORMAT_AYUV`
- `DXGI_FORMAT_Y410`
- `DXGI_FORMAT_Y416`
- `DXGI_FORMAT_NV12`
- `DXGI_FORMAT_P010`
- `DXGI_FORMAT_P016`
- `DXGI_FORMAT_420_OPAQUE`
- `DXGI_FORMAT_YUY2`
- `DXGI_FORMAT_Y210`
- `DXGI_FORMAT_Y216`
- `DXGI_FORMAT_NV11`
- `DXGI_FORMAT_AI44`
- `DXGI_FORMAT_IA44`
- `DXGI_FORMAT_P8`
- `DXGI_FORMAT_A8P8`
- `DXGI_FORMAT_P208`
- `DXGI_FORMAT_V208`
- `DXGI_FORMAT_V408`
- `DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE`
- `DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE`
- `VK_FORMAT_R4G4_UNORM_PACK8`
- `VK_FORMAT_R4G4B4A4_UNORM_PACK16`
- `VK_FORMAT_B4G4R4A4_UNORM_PACK16`
- `VK_FORMAT_B5G6R5_UNORM_PACK16`
- `VK_FORMAT_R5G5B5A1_UNORM_PACK16`
- `VK_FORMAT_B5G5R5A1_UNORM_PACK16`
- `VK_FORMAT_R8_USCALED`
- `VK_FORMAT_R8_SSCALED`
- `VK_FORMAT_R8_SRGB`
- `VK_FORMAT_R8G8_USCALED`
- `VK_FORMAT_R8G8_SSCALED`
- `VK_FORMAT_R8G8_SRGB`
- `VK_FORMAT_R8G8B8_UNORM`
- `VK_FORMAT_R8G8B8_SNORM`
- `VK_FORMAT_R8G8B8_USCALED`
- `VK_FORMAT_R8G8B8_SSCALED`
- `VK_FORMAT_R8G8B8_UINT`
- `VK_FORMAT_R8G8B8_SINT`
- `VK_FORMAT_R8G8B8_SRGB`
- `VK_FORMAT_B8G8R8_UNORM`
- `VK_FORMAT_B8G8R8_SNORM`
- `VK_FORMAT_B8G8R8_USCALED`
- `VK_FORMAT_B8G8R8_SSCALED`
- `VK_FORMAT_B8G8R8_UINT`
- `VK_FORMAT_B8G8R8_SINT`
- `VK_FORMAT_B8G8R8_SRGB`
- `VK_FORMAT_R8G8B8A8_USCALED`
- `VK_FORMAT_R8G8B8A8_SSCALED`
- `VK_FORMAT_B8G8R8A8_SNORM`
- `VK_FORMAT_B8G8R8A8_USCALED`
- `VK_FORMAT_B8G8R8A8_SSCALED`
- `VK_FORMAT_B8G8R8A8_UINT`
- `VK_FORMAT_B8G8R8A8_SINT`
- `VK_FORMAT_A8B8G8R8_UNORM_PACK32`
- `VK_FORMAT_A8B8G8R8_SNORM_PACK32`
- `VK_FORMAT_A8B8G8R8_USCALED_PACK32`
- `VK_FORMAT_A8B8G8R8_SSCALED_PACK32`
- `VK_FORMAT_A8B8G8R8_UINT_PACK32`
- `VK_FORMAT_A8B8G8R8_SINT_PACK32`
- `VK_FORMAT_A8B8G8R8_SRGB_PACK32`
- `VK_FORMAT_A2R10G10B10_UNORM_PACK32`
- `VK_FORMAT_A2R10G10B10_SNORM_PACK32`
- `VK_FORMAT_A2R10G10B10_USCALED_PACK32`
- `VK_FORMAT_A2R10G10B10_SSCALED_PACK32`
- `VK_FORMAT_A2R10G10B10_UINT_PACK32`
- `VK_FORMAT_A2R10G10B10_SINT_PACK32`
- `VK_FORMAT_A2B10G10R10_SNORM_PACK32`
- `VK_FORMAT_A2B10G10R10_USCALED_PACK32`
- `VK_FORMAT_A2B10G10R10_SSCALED_PACK32`
- `VK_FORMAT_A2B10G10R10_SINT_PACK32`
- `VK_FORMAT_R16_USCALED`
- `VK_FORMAT_R16_SSCALED`
- `VK_FORMAT_R16G16_USCALED`
- `VK_FORMAT_R16G16_SSCALED`
- `VK_FORMAT_R16G16B16_UNORM`
- `VK_FORMAT_R16G16B16_SNORM`
- `VK_FORMAT_R16G16B16_USCALED`
- `VK_FORMAT_R16G16B16_SSCALED`
- `VK_FORMAT_R16G16B16_UINT`
- `VK_FORMAT_R16G16B16_SINT`
- `VK_FORMAT_R16G16B16_SFLOAT`
- `VK_FORMAT_R16G16B16A16_USCALED`
- `VK_FORMAT_R16G16B16A16_SSCALED`
- `VK_FORMAT_R64_UINT`
- `VK_FORMAT_R64_SINT`
- `VK_FORMAT_R64_SFLOAT`
- `VK_FORMAT_R64G64_UINT`
- `VK_FORMAT_R64G64_SINT`
- `VK_FORMAT_R64G64_SFLOAT`
- `VK_FORMAT_R64G64B64_UINT`
- `VK_FORMAT_R64G64B64_SINT`
- `VK_FORMAT_R64G64B64_SFLOAT`
- `VK_FORMAT_R64G64B64A64_UINT`
- `VK_FORMAT_R64G64B64A64_SINT`
- `VK_FORMAT_R64G64B64A64_SFLOAT`
- `VK_FORMAT_X8_D24_UNORM_PACK32`
- `VK_FORMAT_S8_UINT`
- `VK_FORMAT_D16_UNORM_S8_UINT`
- `VK_FORMAT_D24_UNORM_S8_UINT`
- `VK_FORMAT_D32_SFLOAT_S8_UINT`
- `VK_FORMAT_BC1_RGB_UNORM_BLOCK`
- `VK_FORMAT_BC1_RGB_SRGB_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK`
- `VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK`
- `VK_FORMAT_EAC_R11_UNORM_BLOCK`
- `VK_FORMAT_EAC_R11_SNORM_BLOCK`
- `VK_FORMAT_EAC_R11G11_UNORM_BLOCK`
- `VK_FORMAT_EAC_R11G11_SNORM_BLOCK`
- `VK_FORMAT_ASTC_4x4_UNORM_BLOCK`
- `VK_FORMAT_ASTC_4x4_SRGB_BLOCK`
- `VK_FORMAT_ASTC_5x4_UNORM_BLOCK`
- `VK_FORMAT_ASTC_5x4_SRGB_BLOCK`
- `VK_FORMAT_ASTC_5x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_5x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_6x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_6x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_6x6_UNORM_BLOCK`
- `VK_FORMAT_ASTC_6x6_SRGB_BLOCK`
- `VK_FORMAT_ASTC_8x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_8x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_8x6_UNORM_BLOCK`
- `VK_FORMAT_ASTC_8x6_SRGB_BLOCK`
- `VK_FORMAT_ASTC_8x8_UNORM_BLOCK`
- `VK_FORMAT_ASTC_8x8_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x5_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x5_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x6_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x6_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x8_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x8_SRGB_BLOCK`
- `VK_FORMAT_ASTC_10x10_UNORM_BLOCK`
- `VK_FORMAT_ASTC_10x10_SRGB_BLOCK`
- `VK_FORMAT_ASTC_12x10_UNORM_BLOCK`
- `VK_FORMAT_ASTC_12x10_SRGB_BLOCK`
- `VK_FORMAT_ASTC_12x12_UNORM_BLOCK`
- `VK_FORMAT_ASTC_12x12_SRGB_BLOCK`
- `VK_FORMAT_G8B8G8R8_422_UNORM`
- `VK_FORMAT_B8G8R8G8_422_UNORM`
- `VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM`
- `VK_FORMAT_G8_B8R8_2PLANE_420_UNORM`
- `VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM`
- `VK_FORMAT_G8_B8R8_2PLANE_422_UNORM`
- `VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM`
- `VK_FORMAT_R10X6_UNORM_PACK16`
- `VK_FORMAT_R10X6G10X6_UNORM_2PACK16`
- `VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16`
- `VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16`
- `VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16`
- `VK_FORMAT_R12X4_UNORM_PACK16`
- `VK_FORMAT_R12X4G12X4_UNORM_2PACK16`
- `VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16`
- `VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16`
- `VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16`
- `VK_FORMAT_G16B16G16R16_422_UNORM`
- `VK_FORMAT_B16G16R16G16_422_UNORM`
- `VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM`
- `VK_FORMAT_G16_B16R16_2PLANE_420_UNORM`
- `VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM`
- `VK_FORMAT_G16_B16R16_2PLANE_422_UNORM`
- `VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM`
- `VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG`
- `VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG`
- `VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT`
- `VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT`
- `VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT`
- `VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT`
- `VK_FORMAT_G8B8G8R8_422_UNORM_KHR`
- `VK_FORMAT_B8G8R8G8_422_UNORM_KHR`
- `VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR`
- `VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR`
- `VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR`
- `VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR`
- `VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR`
- `VK_FORMAT_R10X6_UNORM_PACK16_KHR`
- `VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR`
- `VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR`
- `VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR`
- `VK_FORMAT_R12X4_UNORM_PACK16_KHR`
- `VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR`
- `VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR`
- `VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR`
- `VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR`
- `VK_FORMAT_G16B16G16R16_422_UNORM_KHR`
- `VK_FORMAT_B16G16R16G16_422_UNORM_KHR`
- `VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR`
- `VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR`
- `VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR`
- `VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR`
- `VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_K`

View file

@ -1,9 +0,0 @@
### Derivatives In Compute
An entry point may be decorated with `[DerivativeGroupQuad]` or `[DerivativeGroupLinear]` to specify how to use derivatives in compute shaders.
GLSL syntax may also be used, but is not recommended (`derivative_group_quadsNV`/`derivative_group_linearNV`).
Targets:
* **_SPIRV:_** Enables `DerivativeGroupQuadsNV` or `DerivativeGroupLinearNV`.
* **_GLSL:_** Enables `derivative_group_quadsNV` or `derivative_group_LinearNV`.
* **_HLSL:_** Does nothing. `sm_6_6` is required to use derivatives in compute shaders. HLSL uses an equivalent of `DerivativeGroupQuad`.

View file

@ -1,205 +0,0 @@
Texture Footprint Queries
=========================
Slang supports querying the *footprint* of a texture sampling operation: the texels that would be accessed when performing that operation.
This feature is supported on Vulkan via the `GL_NV_shader_texture_footprint` extension, and on D3D12 via the `NvFootprint*` functions exposed by NVAPI.
# Background
There are many GPU rendering techniques that involve generating a texture (e.g., by rendering to it) and then sampling from that texture in a 3D rendering pass, such that it is difficult to predict *a priori* which parts of the texture will be accessed, or not.
As one example, consider rendering a shadow map that will be accessed when shading a g-buffer.
Depending on the geometry that was rendered into the g-buffer, and the occlusion that might exist, some parts of the shadow map might not be needed at all.
In principle, an application could use a compute pass on the g-buffer to compute, for each pixel, the part of the shadow-map texture that it will access - its footprint.
The application could then aggregate these footprints into a stencil mask or other data structure that could be used to optimize the rendering pass that generates the shadow map.
Unfortunately, it is almost impossible for applications to accurately and reliably predict the texel data that particular sampling operations will require, once non-trivial texture filtering modes are considered.
Sampling operations support a wide variety of state that affects the lookup and filtering of texels. For example:
* When bilinear filtering is enabled, a sampling operation typically accesses the four texels closest to the sampling location and blends them.
* When trilinear filtering is enabled, a sampling operation may access texels at two different mip levels.
* When anisotropic filtering is enabled, a sampling operation may take up to N *taps* (where N is the maximum supported degree of anisotropy), each of which may itself access a neighborhood of texels to produce a filtered value for that tap.
* When sampling a cube map, a sampling operation may straddle the "seam" between two or even three cube faces.
Texture footprint queries are intended to solve this problem by providing application developers with a primitive that can query the footprint of a texture sampling operation using the exact same sampler state and texture coordinates that will be used when sampling the texture later.
# Slang Shader API
Rather than exactly mirror the Vulkan GLSL extension or the NVAPI functions, the Slang core module provides a single common interface that can map to either of those implementations.
## Basics
A typical 2D texture sampling operation is performed using the `Sample()` method on `Texture2D`:
```hlsl
Texture2D<float4> texture = ...;
SamplerState sampler = ...;
float2 coords = ...;
// Sample a 2D texture
float4 color = texture.Sample(
sampler, coords);
```
To query the footprint that would be accessed by this operation, we can use an operation like:
```hlsl
uint granularity = ...;
TextureFootprint2D footprint = texture.queryFootprintCoarse(granularity,
sampler, coords);
```
Note that the same arguments used to call `Sample` above are here passed to `queryFootprint` in the exact same order.
The returned `footprint` encodes a conservative footprint of the texels that would be accessed by the equivalent `Sample` operation above.
Texture footprints are encoded in terms of blocks of texels, and the size of those blocks determined the *granularity* of the footprint.
The `granularity` argument to `queryFootprintCoarse` above indicates the granularity of blocks that the application requests.
In cases where a filtering operation might access two mip levels - one coarse and one fine - a footprint query only returns information about one of the two levels.
The application selects between these options by calling either `queryFootprintCoarse` or `queryFootprintFine`.
## Variations
A wide range of footprint queries are provided, corresponding to various cases of texture sampling operations with different parameters.
For 2D textures, the following functions are supported:
```hlsl
TextureFootprint2D Texture2D.queryFootprintCoarse(
uint granularity, SamplerState sampler, float2 coords);
TextureFootprint2D Texture2D.queryFootprintFine(
uint granularity, SamplerState sampler, float2 coords);
TextureFootprint2D Texture2D.queryFootprintCoarseBias(
uint granularity, SamplerState sampler, float2 coords,
float lodBias);
TextureFootprint2D Texture2D.queryFootprintFineBias(
uint granularity, SamplerState sampler, float2 coords,
float lodBias);
TextureFootprint2D Texture2D.queryFootprintCoarseLevel(
uint granularity, SamplerState sampler, float2 coords,
float lod);
TextureFootprint2D Texture2D.queryFootprintFineLevel(
uint granularity, SamplerState sampler, float2 coords,
float lod);
TextureFootprint2D Texture2D.queryFootprintCoarseGrad(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy);
TextureFootprint2D Texture2D.queryFootprintFineGrad(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy);
// Vulkan-only:
TextureFootprint2D Texture2D.queryFootprintCoarseClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintFineClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintCoarseBiasClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodBias,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintFineBiasClamp(
uint granularity, SamplerState sampler, float2 coords,
float lodBias,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintCoarseGradClamp(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy,
float lodClamp);
TextureFootprint2D Texture2D.queryFootprintFineGradClamp(
uint granularity, SamplerState sampler, float2 coords,
float2 dx, float2 dy,
float lodClamp);
```
For 3D textures, the following functions are supported:
```hlsl
TextureFootprint3D Texture3D.queryFootprintCoarse(
uint granularity, SamplerState sampler, float3 coords);
TextureFootprint3D Texture3D.queryFootprintFine(
uint granularity, SamplerState sampler, float3 coords);
TextureFootprint3D Texture3D.queryFootprintCoarseBias(
uint granularity, SamplerState sampler, float3 coords,
float lodBias);
TextureFootprint3D Texture3D.queryFootprintFineBias(
uint granularity, SamplerState sampler, float3 coords,
float lodBias);
TextureFootprint3D Texture3D.queryFootprintCoarseLevel(
uint granularity, SamplerState sampler, float3 coords,
float lod);
TextureFootprint3D Texture3D.queryFootprintFineLevel(
uint granularity, SamplerState sampler, float3 coords,
float lod);
// Vulkan-only:
TextureFootprint3D Texture3D.queryFootprintCoarseClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodClamp);
TextureFootprint3D Texture3D.queryFootprintFineClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodClamp);
TextureFootprint3D Texture3D.queryFootprintCoarseBiasClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodBias,
float lodClamp);
TextureFootprint3D Texture3D.queryFootprintFineBiasClamp(
uint granularity, SamplerState sampler, float3 coords,
float lodBias,
float lodClamp);
```
## Footprint Types
Footprint queries on 2D and 3D textures return values of type `TextureFootprint2D` and `TextureFootprint3D`, respectively, which are built-in `struct`s defined in the Slang core module:
```
struct TextureFootprint2D
{
typealias Anchor = uint2;
typealias Offset = uint2;
typealias Mask = uint2;
typealias LOD = uint;
typealias Granularity = uint;
property anchor : Anchor { get; }
property offset : Offset { get; }
property mask : Mask { get; }
property lod : LOD { get; }
property granularity : Granularity { get; }
property isSingleLevel : bool { get; }
}
struct TextureFootprint3D
{
typealias Anchor = uint3;
typealias Offset = uint3;
typealias Mask = uint2;
typealias LOD = uint;
typealias Granularity = uint;
property anchor : Anchor { get; }
property offset : Offset { get; }
property mask : Mask { get; }
property lod : LOD { get; }
property granularity : Granularity { get; }
property isSingleLevel : bool { get; }
}
```
A footprint is encoded in terms of *texel groups*, where the `granularity` determines the size of those groups.
When possible, the returned footprint will match the granularity passed into the query operation, but a larger granularity may be selected in cases where the footprint is too large to encode at the requested granularity.
The `anchor` property specifies an anchor point in the texture, in the vicinity of the footprint. Its components are in multiples of 8 texel groups.
The `offset` property specifies how the bits in `mask` map to texel groups in the vicinity of the `anchor` point.
The `mask` property is a 64-bit bitfield (encoded as a `uint2`), where each bit represents footprint coverage of one texel group, within a 8x8 (for 2D textures) or 4x4x4 neighborhood of texel groups.
The `lod` property indicates the mipmap level that would be accessed by the sampling operation.
The `isSingleLevel` property indicates if the sampling operation is known to access only a single mip level.
Note that this property will always be `false` when using the D3D/NVAPI path.

View file

@ -1,259 +0,0 @@
Slang Language Guide
====================
This document will try to describe the main characteristis of the Slang language that might make it different from other shading languages you have used.
The Basics
----------
Slang is similar to HLSL, and it is expected that many HLSL programs can be used as Slang code with no modifications.
Big-picture stuff that is supported:
* A C-style preprocessor
* Ordinary function, `struct`, `typedef`, etc. declarations
* The standard vector/matrix types like `float3` and `float4x4`
* The less-used explicit `vector<T,N>` and `matrix<T,R,C>` types
* `cbuffer` declarations for uniform parameters
* Global-scope declarations of texture/sampler parameters, including with `register` annotations
* Entry points with varying `in`/`out` parameters using semantics (including `SV_*` system-value semantics)
* The built-in templated resource types like `Texture2D<T>` with their object-oriented syntax for sampling operations
* Attributes like `[unroll]` are parsed, and passed along for HLSL/DXBC output, but dropped for other targets
* `struct` types that contain textures/samplers as well as ordinary uniform data, both as function parameters and in constant buffers
* The built-in functions up through Shader Model 6.0 (as documented on MSDN) are supported
New Features
------------
### Import Declarations
In order to support better software modularity, and also to deal with the issue of how to integrate shader libraries written in Slang into other languages, Slang introduces an `import` declaration construct.
The basic idea is that if you write a file `foo.slang` like this:
```hlsl
// foo.slang
float4 someFunc(float4 x) { return x; }
```
you can then import this code into another file in Slang, HLSL, or GLSL:
```hlsl
// bar.slang
import foo;
float4 someOtherFunc(float4 y) { return someFunc(y); }
```
The simplest way to think of it is that the `import foo` declaration instructs the compiler to look for `foo.slang` (in the same search paths it uses for `#include` files), and give an error if it isn't found.
If `foo.slang` is found, then the compiler will go ahead and parse and type-check that file, and make any declarations there visible to the original file (`bar.glsl` in this example).
When it comes time to generate output code, Slang will output any declarations from `import`ed files that were actually used (it skips those that are never referenced), and it will cross-compile them as needed for the chosen target.
A few other details worth knowing about `import` declarations:
* The name you use on the `import` line gets translated into a file name with some very simple rules. An underscore (`_`) in the name turns into a dash (`-`) in the file name, and dot separators (`.`) turn into directory separators (`/`). After these substitutions, `.slang` is added to the end of the name.
* If there are multiple `import` declarations naming the same file, it will only be imported once. This is also true for nested imports.
* Currently importing does not imply any kind of namespacing; all global declarations still occupy a single namespace, and collisions between different imported files (or between a file and the code it imports) are possible. This is a bug.
* If file `A.slang` imports `B.slang`, and then some other file does `import A;`, then only the names from `A.slang` are brought into scope, not those from `B.slang`. This behavior can be controlled by having `A.slang` use `__exported import B;` to also re-export the declarations it imports from `B`.
* An import is *not* like a `#include`, and so the file that does the `import` can't see preprocessor macros defined in the imported file (and vice versa). Think of `import foo;` as closer to `using namespace foo;` in C++ (perhaps without the same baggage).
### Explicit Parameter Blocks
One of the most important new features of modern APIs like Direct3D 12 and Vulkan is an interface for providing shader parameters using efficient *parameter blocks* that can be stored in GPU memory (these are implemented as descriptor tables/sets in D3D12/Vulkan, and "attribute buffers" in Metal).
However, HLSL and GLSL don't support explicit syntax for parameter blocks, and so shader programmers are left to manually pack parameters into blocks either using `register`/`layout` modifiers, or with API-based remapping (in the D3D12 case).
Slang supports a simple and explicit syntax for exploiting parameter blocks:
```hlsl
struct ViewParams
{
float3 cameraPos;
float4x4 viewProj;
TextureCube envMap;
};
ParameterBlock<ViewParams> gViewParams;
```
In this example, the fields of `gViewParams` will be assigned to registers/bindings in a way that supports allocating them into a single parameter block.
For example, when generating GLSL for Vulkan, the Slang compiler will generate a single `uniform` block (for `cameraPos` and `viewProj`) and a global `textureCube` for `envMap`, both decorated with the same `layout(set = ...)`.
### Interfaces
Slang supports declaring `interface`s that user-defined `struct` types can implement.
For example, here is a simple interface for light sources:
```hlsl
// light.slang
struct LightSample { float3 intensity; float3 direction; };
interface ILight
{
LightSample sample(float3 position);
}
```
We can now define a simple user type that "conforms to" (implements) the `ILight` interface:
```hlsl
// point-light.slang
import light;
struct PointLight : ILight
{
float3 position;
float3 intensity;
LightSample sample(float3 hitPos)
{
float3 delta = hitPos - position;
float distance = length(delta);
LightSample sample;
sample.direction = delta / distance;
sample.intensity = intensity * falloff(distance);
return sample;
}
}
```
### Generics
Slang supports *generic* declarations, using the commong angle-brack (`<>`) syntax from languages like C#, Java, etc.
For example, here is a generic function that works with any type of light:
```hlsl
// diffuse.slang
import light;
float4 computeDiffuse<L : ILight>( float4 albedo, float3 P, float3 N, L light )
{
LightSample sample = light.sample(P);
float nDotL = max(0, dot(N, sample.direction));
return albedo * nDotL;
}
```
The `computeDiffuse` function works with any type `L` that implements the `ILight` interface.
Unlike with C++ templates, the `computeDiffuse` function can be compiled and type-checked once (you won't suddenly get unexpected error messages when plugging in a new type).
#### Global-Scope Generic Parameters
Putting generic parameter directly on functions is helpful, but in many cases existing HLSL shaders declare their parameters at global scope.
For example, we might have a shader that uses a global declaration of material parameters:
```hlsl
Material gMaterial;
```
In order to allow such a shader to be converted to use a generic parameter for the material type (to allow for specialization), Slang supports declaring type parameters at the global scope:
```hlsl
type_param M : IMaterial;
M gMaterial;
```
Conceptually, you can think of this syntax as wrapping your entire shader program in a generic with parameter `<M : IMaterial>`.
This isn't beautiful syntax, but it may help when incrementally porting an existing HLSL codebase to use Slang's features.
### Associated Types
Sometimes it is difficult to define an interface because each type that implements it might need to make its own choice about some intermediate type.
As a concrete example, suppose we want to define an interface `IMaterial` for material surface shaders, where each material might use its own BRDF.
We want to support evaluating the *pattern* of the surface separate from the reflectance function.
```hlsl
// A reflectance function
interface IBRDF
{
float3 eval(float3 wi, float3 wo);
}
struct DisneyBRDF : IBRDF { ... };
struct KajiyaKay : IBRDF { ... };
// a surface pattern
interface IMaterial
{
??? evalPattern(float3 position, float2 uv);
}
```
What is the type `???` that `evalPattern` should return? We know that it needs to be a type that supports `IBRDF`, but *which* type?
One material might want to use `DisneyBRDF` while another wants to use `KajiyaKay`.
The solution in Slang, as in modern languages like Swift and Rust, is to use *associated types* to express the dependence of the BRDF type on the material type:
```hlsl
interface IMaterial
{
associatedtype B : IBRDF;
B evalPattern(float3 position, float2 uv);
}
struct MyCoolMaterial : IMaterial
{
typedef DisneyBRDF B;
B evalPattern(float3 position, float2 uv)
{ ... }
}
```
Associated types are an advanced concept, and we only recommend using them when they are needed to define a usable interface.
Future Extensions
-----------------
### Implicit Generics Syntax
The syntax for generics and interfaces in Slang is currently explicit, but verbose:
```hlsl
float4 computeDiffuse<L : ILight>( L light, ... )
{ ... }
```
As a future change, we would like to allow using an interface like `ILight` as an ordinary parameter type:
```hlsl
float4 computeDiffuse( ILight light, ... )
{ ... }
```
This simpler syntax would act like "syntactic sugar" for the existing explicit generics syntax, so it would retain all of the important performance properties.
### Returning a Value of Interface Type
While the above dealt with using an interface as a parameter type, we would eventually like to support using an interface as the *return* type of a function:
```hlsl
ILight getALightSource(Scene scene) { ... }
```
Implementing this case efficiently is more challenging. In most cases, an associated type can be used instead when an interface return type would be desired.
Not Supported
-------------
Some features of the current HLSL language are not supported, but probably will be given enough time/resources:
* Local variables of texture/sampler type (or that contain these)
* Matrix swizzles
* Explicit `packoffset` annotations on members of `cbuffer`s
Some things from HLSL are *not* planned to be supported, unless there is significant outcry from users:
* Pre-D3D10/11 syntax and operations
* The "effect" system, and the related `<>` annotation syntax
* Explicit `register` bindings on textures/samplers nested in `cbuffer`s
* Any further work towards making HLSL a subset of C++ (simply because implementing a full C++ compiler is way out of scope for the Slang project)

View file

@ -1,35 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Introduction
============
Slang is a programming language primarily designed for use in *shader programming*, by which we mean performance oriented GPU programming for real-time graphics.
Overview
--------
This document aims to provide a detailed reference for the Slang language and its supported constructs.
The Slang compiler *implementation* may deviate from the language as documented here, in a few key ways:
* The implementation is necessarily imperfect, and can have bugs
* The implementation may not fully support constructs documented here, or their capabilities may not be as complete as what is documented
* The implementation may support certain constructs that are experimental, deprecated, or are otherwise intentionally undocumented
Where possible, this document will call out known deviations between the language as defined here and the implementation in the compiler.
Terminology
-----------
> Note: This section is not yet complete.
>
> This section should detail how the document uses terms like "may" and "must," if we intend for those to be used in a manner consistent with [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt).
Typographical Conventions
-------------------------
> Note: This section is not yet complete.
>
> This section should clarify how the document displays code fragments, grammar productions, etc.

View file

@ -1,121 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Lexical Structure
=================
Source Units
------------
A _source unit_ comprises a sequence of zero or more _characters_ which for purposes of this document are defined as Unicode scalars (code points).
Encoding
--------
Implementations *may* accept source units stored as files on disk, buffers in memory, or any appropriate implementation-specified means.
When source units are stored as byte sequences, they *should* be encoded using UTF-8.
Implementations *may* support additional implemented-specified encodings.
Whitespace
----------
_Horizontal whitespace_ consists of space (U+0020) and horizontal tab (U+0009).
A _line break_ consists of a line feed (U+000A), carriage return (U+000D) or a carriage return followed by a line feed (U+000D, U+000A).
Line breaks are used as line separators rather than terminators; it is not necessary for a source unit to end with a line break.
Escaped Line Breaks
-------------------
An _escaped line break_ comprises a backslack (`\`, U+005C) follow immediately by a line break.
Comments
--------
A _comment_ is either a line comment or a block comment:
```hlsl
// a line comment
/* a block comment */
```
A _line comment_ comprises two forward slashes (`/`, U+002F) followed by zero or more characters that do not contain a line break.
A line comment extends up to, but does not include, a subsequent line break or the end of the source unit.
A _block comment_ begins with a forward slash (`/`, U+002F) followed by an asterisk (`*`, U+0052).
A block comment is terminated by the next instance of an asterisk followed by a forward slash (`*/`).
A block comment contains all characters between where it begins and where it terminates, including any line breaks.
Block comments do not nest.
It is an error if a block comment that begins in a source unit is not terminated in that source unit.
Phases
------
Compilation of a source unit proceeds _as if_ the following steps are executed in order:
1. Line numbering (for subsequent diagnostic messages) is noted based on the locations of line breaks
2. Escaped line breaks are eliminated. No new characters are inserted to replace them. Any new escaped line breaks introduced by this step are not eliminated.
3. Each comments is replaced with a single space (U+0020)
4. The source unit is _lexed_ into a sequence of tokens according the lexical grammar in this chapter
5. The lexed sequence of tokens is _preprocessed_ to produce a new sequence of tokens (Chapter 3)
6. Subsequent processing is performed on the preprocessed sequence of tokens
Identifiers
-----------
An _identifier_ begins with an uppercase or lowercase ASCII letter (`A` through `Z`, `a` through `z`), or an underscore (`_`).
After the first character, ASCII digits (`0` through `9`) may also be used in an identifier.
The identifier consistent of a single underscore (`_`) is reserved by the language and must not be used by programs.
Otherwise, there are no fixed keywords or reserved words.
Words that name a built-in language construct can also be used as user-defined identifiers and will shadow the built-in definitions in the scope of their definition.
Literals
--------
### Integer Literals
An _integer literal_ consists of an optional radix specifier followed by digits and an optional suffix.
The _radix specifier_ may be:
* `0x` or `0X` to specify a hexadecimal literal (radix 16)
* `0b` or `0B` to specify a binary literal (radix 2)
When no radix specifier is present a radix of 10 is used.
Octal literals (radix 8) are not supported.
A `0` prefix on an integer literal does *not* specify an octal literal as it does in C.
Implementations *may* warn on integer literals with a `0` prefix in case users expect C behavior.
The _digits_ of an integer literal may include ASCII `0` through `9`.
In the case of a hexadecimal literal, digits may include the letters `A` through `F` (and `a` through `f`) which represent digit values of 10 through 15.
It is an error for an integer literal to include a digit with a value greater than or equal to the radix.
The digits of an integer literal may also include underscore (`_`) characters, which are ignored and have no semantic impact.
The _suffix_ on an integer literal may be used to indicate the desired type of the literal:
* A `u` suffix indicates the `uint` type
* An `l` or `ll` suffix indicates the `int64_t` type
* A `ul` or `ull` suffix indicates the `uint64_t` type
### Floating-Point Literals
> Note: This section is not yet complete.
### String Literals
> Note: This section is not yet complete.
### Character Literals
> Note: This section is not yet complete.
Operators and Punctuation
-------------------------
> Note: This section is not yet complete.

View file

@ -1,19 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Preprocessor
============
Slang supports a C-style preprocessor with the following directives:
* `#include`
* `#define`
* `#undef`
* `#if`, `#ifdef`, `#ifndef`
* `#else`, `#elif`
* `#endif`
* `#error`
* `#warning`
* `#line`
* `#pragma`
> Note: This section is not yet complete.

View file

@ -1,339 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Types
=====
This section defines the kinds of types supported by Slang.
Types in Slang do not necessarily prescribe a single _layout_ in memory.
The discussion of each type will specify any guarantees about layout it provides; any details of layout not specified here may depend on the target platform, compiler options, and context in which a type is used.
Void Type
---------
The type `void` contains no data and has a single, unnamed, value.
A `void` value takes up no space, and thus does not affect the layout of types.
Formally, a `void` value behaves as if it has a size of zero bytes, and one-byte alignment.
Scalar Types
------------
### Boolean Type
The type `bool` is used to represent Boolean truth values: `true` and `false`.
The size of a `bool` varies across target platforms; programs that need to ensure a matching in-memory layout between targets should not use `bool` for in-memory data structures.
On all platforms, the `bool` type must be _naturally aligned_ (its alignment is its size).
### Integer Types
The following integer types are defined:
| Name | Description |
|---------------|-------------|
| `int8_t` | 8-bit signed integer |
| `int16_t` | 16-bit signed integer |
| `int` | 32-bit signed integer |
| `int64_t` | 64-bit signed integer |
| `uint8_t` | 8-bit unsigned integer |
| `uint16_t` | 16-bit unsigned integer |
| `uint` | 32-bit unsigned integer |
| `uint64_t` | 64-bit unsigned integer |
All signed integers used two's complement representation.
All arithmetic operations on integers (both signed and unsigned) wrap on overflow/underflow.
All target platforms must support the `int` and `uint` types.
Specific [target platforms](../target-compatibility.md) may not support the other integer types.
All integer types are stored in memory with their natural size and alignment on all targets that support them.
### Floating-Point Types
The following floating-point type are defined:
| Name | Description |
|---------------|-------------------------------|
| `half` | 16-bit floating-point number (1 sign bit, 5 exponent bits, 10 fraction bits) |
| `float` | 32-bit floating-point number (1 sign bit, 8 exponent bits, 23 fraction bits) |
| `double` | 64-bit floating-point number (1 sign bit, 11 exponent bits, 52 fraction bits) |
All floating-point types are laid out in memory using the matching IEEE 754 standard format (`binary16`, `binary32`, `binary64`).
Target platforms may define their own rules for rounding, precision, denormals, infinities, and not-a-number values.
All target platforms must support the `float` type.
Specific [targets](../target-compatibility.md) may not support the other floating-point types.
All floating-point types are stored in memory with their natural size and alignment on all targets that support them.
Vector Types
------------
A vector type is written as `vector<T, N>` and represents an `N`-element vector with elements of type `T`.
The _element type_ `T` must be one of the built-in scalar types, and the _element count_ `N` must be a specialization-time constant integer.
The element count must be between 2 and 4, inclusive.
A vector type allows subscripting of its elements like an array, but also supports element-wise arithmetic on its elements.
_Element-wise arithmetic_ means mapping unary and binary operators over the elements of a vector to produce a vector of results:
```hlsl
vector<int,4> a = { 1, 2, 30, 40 };
vector<int,4> b = { 10, 20, 3, 4 };
-a; // yields { -1, -2, -30, -40 }
a + b; // yields { 11, 22, 33, 44 }
b / a; // yields { 10, 10, 0, 0 }
a > b; // yields { false, false, true, true }
```
A vector type is laid out in memory as `N` contiguous values of type `T` with no padding.
The alignment of a vector type may vary by target platforms.
The alignment of `vector<T,N>` will be at least the alignment of `T` and may be at most `N` times the alignment of `T`.
As a convenience, Slang defines built-in type aliases for vectors of the built-in scalar types.
E.g., declarations equivalent to the following are provided by the Slang core module:
```hlsl
typealias float4 = vector<float, 4>;
typealias int8_t3 = vector<int8_t, 3>;
```
### Legacy Syntax
For compatibility with older codebases, the generic `vector` type includes default values for `T` and `N`, being declared as:
```hlsl
struct vector<T = float, let N : int = 4> { ... }
```
This means that the bare name `vector` may be used as a type equivalent to `float4`:
```hlsl
// All of these variables have the same type
vector a;
float4 b;
vector<float> c;
vector<float, 4> d;
```
Matrix Types
------------
A matrix type is written as `matrix<T, R, C>` and represents a matrix of `R` rows and `C` columns, with elements of type `T`.
The element type `T` must be one of the built-in scalar types.
The _row count_ `R` and _column count_ `C` must be specialization-time constant integers.
The row count and column count must each be between 2 and 4, respectively.
A matrix type allows subscripting of its rows, similar to an `R`-element array of `vector<T,C>` elements.
A matrix type also supports element-wise arithmetic.
Matrix types support both _row-major_ and _column-major_ memory layout.
Implementations may support command-line flags or API options to control the default layout to use for matrices.
> Note: Slang currently does *not* support the HLSL `row_major` and `column_major` modifiers to set the layout used for specific declarations.
Under row-major layout, a matrix is laid out in memory equivalently to an `R`-element array of `vector<T,C>` elements.
Under column-major layout, a matrix is laid out in memory equivalent to the row-major layout of its transpose.
This means it will be laid out equivalently to a `C`-element array of `vector<T,R>` elements.
As a convenience, Slang defines built-in type aliases for matrices of the built-in scalar types.
E.g., declarations equivalent to the following are provided by the Slang core module:
```hlsl
typealias float3x4 = matrix<float, 3, 4>;
typealias int64_t4x2 = matrix<int64_t, 4, 2>;
```
> Note: For programmers using OpenGL or Vulkan as their graphics API, and/or who are used to the GLSL language,
> it is important to recognize that the equivalent of a GLSL `mat3x4` is a Slang `float3x4`.
> This is despite the fact that GLSL defines a `mat3x4` as having 3 *columns* and 4 *rows*, while a Slang `float3x4` is defined as having 3 rows and 4 columns.
> This convention means that wherever Slang refers to "rows" or "columns" of a matrix, the equivalent terms in the GLSL, SPIR-V, OpenGL, and Vulkan specifications are "column" and "row" respectively (*including* in the compound terms of "row-major" and "column-major")
> While it may seem that this choice of convention is confusing, it is necessary to ensure that subscripting with `[]` can be efficiently implemented on all target platforms.
> This decision in the Slang language is consistent with the compilation of HLSL to SPIR-V performed by other compilers.
### Legacy Syntax
For compatibility with older codebases, the generic `matrix` type includes default values for `T`, `R`, and `C`, being declared as:
```hlsl
struct matrix<T = float, let R : int = 4, let C : int = 4> { ... }
```
This means that the bare name `matrix` may be used as a type equivalent to `float4x4`:
```hlsl
// All of these variables have the same type
matrix a;
float4x4 b;
matrix<float, 4, 4> c;
```
Structure Types
---------------
Structure types are introduced with `struct` declarations, and consist of an ordered sequence of named and typed fields:
```hlsl
struct S
{
float2 f;
int3 i;
}
```
### Standard Layout
The _standard layout_ for a structure type uses the following algorithm:
* Initialize variables `size` and `alignment` to zero and one, respectively
* For each field `f` of the structure type:
* Update `alignment` to be the maximum of `alignment` and the alignment of `f`
* Set `size` to the smallest multiple of `alignment` not less than `size`
* Set the offset of field `f` to `size`
* Add the size of `f` to `size`
When this algorithm completes, `size` and `alignment` will be the size and alignment of the structure type.
Most target platforms do not use the standard layout directly, but it provides a baseline for defining other layout algorithms.
Any layout for structure types must guarantee an alignment at least as large as the standard layout.
### C-Style Layout
C-style layout for structure types differs from standard layout by adding an additional final step:
* Set `size` the smallest multiple of `alignment` not less than `size`
This mirrors the layout rules used by typical C/C++ compilers.
### D3D Constant Buffer Layout
D3D constant buffer layout is similar to standard layout with two differences:
* The initial alignment is 16 instead of one
* If a field would have _improper straddle_, where the interval `(fieldOffset, fieldOffset+fieldSize)` (exclusive on both sides) contains any multiple of 16, *and* the field offset is not already a multiple of 16, then the offset of the field is adjusted to the next multiple of 16
Array Types
-----------
An _array type_ is either a statically-sized or dynamically-sized array type.
A known-size array type is written `T[N]` where `T` is a type and `N` is a specialization-time constant integer.
This type represents an array of exactly `N` values of type `T`.
An unknown-size array type is written `T[]` where `T` is a type.
This type represents an array of some fixed, but statically unknown, size.
> Note: Unlike in C and C++, arrays in Slang are always value types, meaning that assignment and parameter passing of arrays copies their elements.
### Declaration Syntax
For variable and parameter declarations using traditional syntax, a variable of array type may be declared by using the element type `T` as a type specifier (before the variable name) and the `[N]` to specify the element count after the variable name:
```hlsl
int a[10];
```
Alternatively, the array type itself may be used as the type specifier:
```hlsl
int[10] a;
```
When using the `var` or `let` keyword to declare a variable, the array type must not be split:
```hlsl
var a : int[10];
```
> Note: when declaring arrays of arrays (often thought of as "multidimensional arrays") a programmer must be careful about the difference between the two declaration syntaxes.
> The following two declarations are equivalent:
>
> ```hlsl
> int[3][5] a;
> int a[5][3];
> ```
>
> In each case, `a` is a five-element array of three-element arrays of `int`s.
> However, one declaration orders the element counts as `[3][5]` and the other as `[5][3]`.
### Element Count Inference
When a variable is declared with an unknown-size array type, and also includes an initial-value expression:
```hlsl
int a[] = { 0xA, 0xB, 0xC, 0xD };
```
The compiler will attempt to infer an element count based on the type and/or structure of the initial-value expression.
In the above case, the compiler will infer an element count of 4 from the structure of the initializer-list expression.
Thus the preceding declaration is equivalent to:
```hlsl
int a[4] = { 0xA, 0xB, 0xC, 0xD };
```
A variable declared in this fashion semantically has a known-size array type and not an unknown-size array type; the use of an unknown-size array type for the declaration is just a convenience feature.
### Standard Layout
The _stride_ of a type is the smallest multiple of its alignment not less than its size.
Using the standard layout for an array type `T[]` or `T[N]`:
* The _element stride_ of the array type is the stride of its element type `T`
* Element `i` of the array starts at an offset that is `i` times the element stride of the array
* The alignment of the array type is the alignment of `T`
* The size of an unknown-size array type is unknown
* The size of a known-size array with zero elements is zero
* The size of a known-size array with a nonzero number `N` of elements is the size of `T` plus `N - 1` times the element stride of the array
### C-Style Layout
The C-style layout of an array type differs from the standard layout in that the size of a known-size array with a nonzero number `N` of elements is `N` times the element stride of the array.
### D3D Constant Buffer Layout
The D3D constant buffer layout of an array differs from the standard layout in that the element stride of the array is set to the smallest multiple of the alignment of `T` that is not less than the stride of `T`
This Type
---------
Within the body of a structure or interface declaration, the keyword `This` may be used to refer to the enclosing type.
Inside of a structure type declaration, `This` refers to the structure type itself.
Inside of an interface declaration, `This` refers to the concrete type that is conforming to the interface (that is, the type of `this`).
Opaque Types
------------
_Opaque_ types are built-in types that (depending on the target platform) may not have a well-defined size or representation in memory.
Similar languages may refer to these as "resource types" or "object types."
The full list of opaque types supported by Slang can be found in the core module reference, but important examples are:
* Texture types such as `Texture2D<T>`, `TextureCubeArray<T>`, and `RWTexture2DMS<T>`
* Sampler state types: `SamplerState` and `SamplerComparisonState`
* Buffer types like `ConstantBuffer<T>` and `StructuredBuffer<T>`
* Parameter blocks: `ParameterBlock<T>`
Layout for opaque types depends on the target platform, and no specific guarantees can be made about layout rules across platforms.
Known and Unknown Size
----------------------
Every type has either known or unknown size.
Types with unknown size arise in a few ways:
* An unknown-size array type has unknown size
* A structure type has unknown size if any field type has unknown size
The use of types with unknown size is restricted as follows:
* A type with unknown size cannot be used as the element type of an array
* A type with unknown size can only be used as the last field of a structure type
* A type with unknown size cannot be used as a generic argument to specialize a user-defined type, function, etc. Specific built-in generic types/functions may support unknown-size types, and this will be documented on the specific type/function.

View file

@ -1,353 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Expressions
===========
Expressions are terms that can be _evaluated_ to produce values.
This section provides a list of the kinds of expressions that may be used in a Slang program.
In general, the order of evaluation of a Slang expression proceeds from left to right.
Where specific expressions do not follow this order of evaluation, it will be noted.
Some expressions can yield _l-values_, which allows them to be used on the left-hand-side of assignment, or as arguments for `out` or `in out` parameters.
Literal Expressions
-------------------
Literal expressions are never l-values.
### Integer Literal Expressions
An integer literal expression consists of a single integer literal token:
```hlsl
123
```
An unsuffixed integer literal expression always has type `int`.
### Floating-Point Literal Expressions
A floating-point literal expression consists of a single floating-point literal token:
```hlsl
1.23
```
A unsuffixed floating-point literal expression always has type `float`.
### Boolean Literal Expressions
Boolean literal expressions use the keywords `true` and `false`.
### String Literal Expressions
A string literal expressions consists of one or more string literal tokens in a row:
```hlsl
"This" "is one" "string"
```
Identifier Expression
---------------------
An _identifier expression_ consists of a single identifier:
```hlsl
someName
```
When evaluated, this expression looks up `someName` in the environment of the expression and yields the value of a declaration with a matching name.
An identifier expression is an l-value if the declaration it refers to is mutable.
### Overloading
It is possible for an identifier expression to be _overloaded_, such that it refers to one or more candidate declarations with the same name.
If the expression appears in a context where the correct declaration to use can be disambiguated, then that declaration is used as the result of the name expression; otherwise use of an overloaded name is an error at the use site.
### Implicit Lookup
It is possible for a name expression to refer to nested declarations in two ways:
* In the body of a method, a reference to `someName` may resolve to `this.someName`, using the implicit `this` parameter of the method
* When a global-scope `cbuffer` or `tbuffer` declaration is used, `someName` may refer to a field declared inside the `cbuffer` or `tbuffer`
Member Expression
-----------------
A _member expression_ consists of a base expression followed by a dot (`.`) and an identifier naming a member to be accessed:
```hlsl
base.m
```
When `base` is a structure type, this expression looks up the field or other member named by `m`.
Just as for an identifier expression, the result of a member expression may be overloaded, and might be disambiguated based on how it is used.
A member expression is an l-value if the base expression is an l-value and the member it refers to is mutable.
### Implicit Dereference
If the base expression of a member reference is a _pointer-like type_ such as `ConstantBuffer<T>`, then a member reference expression will implicitly dereference the base expression to refer to the pointed-to value (e.g., in the case of `ConstantBuffer<T>` this is the buffer contents of type `T`).
### Vector Swizzles
When the base expression of a member expression is of a vector type `vector<T,N>` then a member expression is a _vector swizzle expression_.
The member name must conform to these constraints:
* The member name must comprise between one and four ASCII characters
* The characters must be come either from the set (`x`, `y`, `z`, `w`) or (`r`, `g`, `b`, `a`), corresponding to element indics of (0, 1, 2, 3)
* The element index corresponding to each character must be less than `N`
If the member name of a swizzle consists of a single character, then the expression has type `T` and is equivalent to a subscript expression with the corresponding element index.
If the member name of a swizzle consists of `M` characters, then the result is a `vector<T,M>` built from the elements of the base vector with the corresponding indices.
A vector swizzle expression is an l-value if the base expression was an l-value and the list of indices corresponding to the characters of the member name contains no duplicates.
### Matrix Swizzles
> Note: The Slang implementation currently doesn't support matrix swizzles.
### Static Member Expressions
When the base expression of a member expression is a type instead of a value, the result is a _static member expression_.
A static member expression can refer to a static field or static method of a structure type.
A static member expression can also refer to a case of an enumeration type.
A static member expression (but not a member expression in general) may use the token `::` instead of `.` to separate the base and member name:
```hlsl
// These are equivalent
Color.Red
Color::Red
```
This Expression
---------------
A _this expression_ consists of the keyword `this` and refers to the implicit instance of the enclosing type that is being operated on in instance methods, subscripts, and initializers.
The type of `this` is `This`.
Parenthesized Expression
----------------------
An expression wrapped in parentheses `()` is a _parenthesized expression_ and evaluates to the same value as the wrapped expression.
Call Expression
---------------
A _call expression_ consists of a base expression and a list of argument expressions, separated by commas and enclosed in `()`:
```hlsl
myFunction( 1.0f, 20 )
```
When the base expression (e.g., `myFunction`) is overloaded, a call expression can disambiguate the overloaded expression based on the number and type or arguments present.
The base expression of a call may be a member reference expression:
```hlsl
myObject.myFunc( 1.0f )
```
In this case the base expression of the member reference (e.g., `myObject` in this case) is used as the argument for the implicit `this` parameter of the callee.
### Mutability
If a `[mutating]` instance is being called, the argument for the implicit `this` parameter must be an l-value.
The argument expressions corresponding to any `out` or `in out` parameters of the callee must be l-values.
A call expression is never an l-value.
### Initializer Expressions
When the base expression of a call is a type instead of a value, the expression is an initializer expression:
```hlsl
float2(1.0f, 2.0f)
```
An initializer expression initialized an instance of the specified type using the given arguments.
An initializer expression with only a single argument is treated as a cast expression:
```hlsl
// these are equivalent
int(1.0f)
(int) 1.0f
```
Subscript Expression
--------------------
A _subscript expression_ consists of a base expression and a list of argument expressions, separated by commas and enclosed in `[]`:
```hlsl
myVector[someIndex]
```
A subscript expression invokes one of the subscript declarations in the type of the base expression. Which subscript declaration is invoked is resolved based on the number and types of the arguments.
A subscript expression is an l-value if the base expression is an l-value and if the subscript declaration it refers to has a setter or by-reference accessor.
Subscripts may be formed on the built-in vector, matrix, and array types.
Initializer List Expression
---------------------------
An _initializer list expression_ comprises zero or more expressions, separated by commas, enclosed in `{}`:
```
{ 1, "hello", 2.0f }
```
An initialier list expression may only be used directly as the initial-value expression of a variable or parameter declaration; initializer lists are not allowed as arbitrary sub-expressions.
> Note: This section will need to be updated with the detailed rules for how expressions in the initializer list are used to initialize values of each kind of type.
Cast Expression
---------------
A _cast expression_ attempt to coerce a single value (the base expression) to a desired type (the target type):
```hlsl
(int) 1.0f
```
A cast expression can perform both built-in type conversions and invoke any single-argument initializers of the target type.
### Compatibility Feature
As a compatibility feature for older code, Slang supports using a cast where the base expression is an integer literal zero and the target type is a user-defined structure type:
```hlsl
MyStruct s = (MyStruct) 0;
```
The semantics of such a cast are equivalent to initialization from an empty initializer list:
```hlsl
MyStruct s = {};
```
Assignment Expression
---------------------
An _assignment expression_ consists of a left-hand side expression, an equals sign (`=`), and a right-hand-side expressions:
```hlsl
myVar = someValue
```
The semantics of an assignment expression are to:
* Evaluate the left-hand side to produce an l-value,
* Evaluate the right-hand side to produce a value
* Store the value of the right-hand side to the l-value of the left-hand side
* Yield the l-value of the left-hand-side
Operator Expressions
--------------------
### Prefix Operator Expressions
The following prefix operators are supported:
| Operator | Description |
|-----------|-------------|
| `+` | identity |
| `-` | arithmetic negation |
| `~` | bit-wise Boolean negation |
| `!` | Boolean negation |
| `++` | increment in place |
| `--` | decrement in place |
A prefix operator expression like `+val` is equivalent to a call expression to a function of the matching name `operator+(val)`, except that lookup for the function only considers functions marked with the `__prefix` keyword.
The built-in prefix `++` and `--` operators require that their operand is an l-value, and work as follows:
* Evaluate the operand to produce an l-value
* Read from the l-value to yield an _old value_
* Increment or decrement the value to yield a _new value_
* Write the new value to the l-value
* Yield the new value
### Postfix Operator Expressions
The following postfix operators are supported:
| Operator | Description |
|-----------|-------------|
| `++` | increment in place |
| `--` | decrement in place |
A postfix operator expression like `val++` is equivalent to a call expression to a function of the matching name `operator++(val)`, except that lookup for the function only considers functions marked with the `__postfix` keyword.
The built-in prefix `++` and `--` operators require that their operand is an l-value, and work as follows:
* Evaluate the operand to produce an l-value
* Read from the l-value to yield an _old value_
* Increment or decrement the value to yield a _new value_
* Write the new value to the l-value
* Yield the old value
### Infix Operator Expressions
The follow infix binary operators are supported:
| Operator | Kind | Description |
|-----------|-------------|-------------|
| `*` | Multiplicative | multiplication |
| `/` | Multiplicative | division |
| `%` | Multiplicative | remainder of division |
| `+` | Additive | addition |
| `-` | Additive | subtraction |
| `<<` | Shift | left shift |
| `>>` | Shift | right shift |
| `<` | Relational | less than |
| `>` | Relational | greater than |
| `<=` | Relational | less than or equal to |
| `>=` | Relational | greater than or equal to |
| `==` | Equality | equal to |
| `!=` | Equality | not equal to |
| `&` | BitAnd | bitwise and |
| `^` | BitXor | bitwise exclusive or |
| `\|` | BitOr | bitwise or |
| `&&` | And | logical and |
| `\|\|` | Or | logical or |
| `+=` | Assignment | compound add/assign |
| `-=` | Assignment | compound subtract/assign |
| `*=` | Assignment | compound multiply/assign |
| `/=` | Assignment | compound divide/assign |
| `%=` | Assignment | compound remainder/assign |
| `<<=` | Assignment | compound left shift/assign |
| `>>=` | Assignment | compound right shift/assign |
| `&=` | Assignment | compound bitwise and/assign |
| `\|=` | Assignment | compound bitwise or/assign |
| `^=` | Assignment | compound bitwise xor/assign |
| `=` | Assignment | assignment |
| `,` | Sequencing | sequence |
With the exception of the assignment operator (`=`), an infix operator expression like `left + right` is equivalent to a call expression to a function of the matching name `operator+(left, right)`.
### Conditional Expression
The conditional operator, `?:`, is used to select between two expressions based on the value of a condition:
```hlsl
useNegative ? -1.0f : 1.0f
```
The condition may be either a single value of type `bool`, or a vector of `bool`.
When a vector of `bool` is used, the two values being selected between must be vectors, and selection is performed component-wise.
> Note: Unlike C, C++, GLSL, and most other C-family languages, Slang currently follows the precedent of HLSL where `?:` does not short-circuit.
>
> This decision may change (for the scalar case) in a future version of the language.
> Programmer are encouraged to write code that does not depend on whether or not `?:` short-circuits.

View file

@ -1,237 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Statements
==========
Statements are used to define the bodies of functions and determine order of evaluation and control flow for an entire program.
Statements are distinct from expressions in that statements do not yield results and do not have types.
This section lists the kinds of statements supported by Slang.
Expression Statement
--------------------
An expression statement consists of an expression followed by a semicolon:
```hlsl
doSomething();
a[10] = b + 1;
```
An implementation may warn on an expression statement that has to effect on the results of execution.
Declaration Statement
---------------------
A declaration may be used as a statement:
```hlsl
let x = 10;
var y = x + 1;
int z = y - x;
```
> Note: Currently only variable declarations are allowed in statement contexts, but other kinds of declarations may be enabled in the future.
Block Statement
---------------
A block statement consists of zero or more statements wrapped in curly braces `{}`:
```hlsl
{
int x = 10;
doSomething(x);
}
```
A block statement provides local scoping to declarations.
Declarations in a block are visible to later statements in the same block, but not to statements or expressions outside of the block.
Empty Statement
---------------
A single semicolon (`;`) may be used as an empty statement equivalent to an empty block statement `{}`.
Conditional Statements
----------------------
### If Statement
An _if statement_ consists of the `if` keyword and a conditional expression in parentheses, followed by a statement to execute if the condition is true:
```hlsl
if(somethingShouldHappen)
doSomething();
```
An if statement may optionally include an _else clause_ consisting of the keyword `else` followed by a statement to execute if the condition is false:
```hlsl
if(somethingShouldHappen)
doSomething();
else
doNothing();
```
### Switch Statement
A _switch statement_ consists of the `switch` keyword followed by an expression wrapped in parentheses and a _body statement_:
```hlsl
switch(someValue)
{
...
}
```
The body of a switch statement must be a block statement, and its body must consist of switch case clauses.
A _switch case clause_ consists of one or more case labels or default labels, followed by one or more statements:
```hlsl
// this is a switch case clause
case 0:
case 1:
doBasicThing();
break;
// this is another switch case clause
default:
doAnotherThing();
break;
```
A _case label_ consists of the keyword `case` followed by an expressions and a colon (`:`).
The expression must evaluate to a compile-time constant integer.
A _default label_ consists of the keyword `default` followed by a colon (`:`).
It is an error for a case label or default label to appear anywhere other than the body of a `switch` statement.
It is an error for a statement to appear inside the body of a `switch` statement that is no part of a switch case clause.
Each switch case clause must exit the `switch` statement via a `break` or other control transfer statement.
"Fall-through" from one switch case clause to another is not allowed.
Loop Statements
---------------
### For Statement
A _for statement_ uses the following form:
```hlsl
for( <initial statement> ; <condition expression> ; <side effect expression> ) <body statement>
```
The _initial statement_ is optional, but may declare a variable whose scope is limited to the for statement.
The _condition expression_ is optional. If present it must be an expression that can be coerced to type `bool`. If absent, a true value is used as the condition.
The _side effect expression_ is optional. If present it will executed for its effects before each testing the condition for every loop iteration after the first.
The _body statement_ is a statement that will be executed for each iteration of the loop.
### While Statement
A _while statement_ uses the following form:
```hlsl
while( <condition expression> ) <body statement>
```
and is equivalent to a `for` loop of the form:
```hlsl
for( ; <condition expression> ; ) <body statement>
```
### Do-While Statement
A _do-while statement_ uses the following form:
```hlsl
do <body statement> while( <condition expression> )
```
and is equivalent to a `for` loop of the form:
```hlsl
for(;;)
{
<body statement>
if(<condition expression>) continue; else break;
}
```
Control Transfer Statements
---------------------------
### Break Statement
A `break` statement transfers control to after the end of the closest lexically enclosing switch statement or loop statement:
```hlsl
break;
```
### Continue Statement
A `continue` statement transfers control to the start of the next iteration of a loop statement.
In a for statement with a side effect expression, the side effect expression is evaluated when `continue` is used:
```hlsl
break;
```
### Return Statement
A `return` statement transfers control out of the current function.
In the body of a function with a `void` result type, the `return` keyword may be followed immediately by a semicolon:
```hlsl
return;
```
Otherwise, the `return` keyword must be followed by an expression to use as the value to return to the caller:
```hlsl
return someValue;
```
The value returned must be able to coerce to the result type of the lexically enclosing function.
### Discard Statement
A `discard` statement can only be used in the context of a fragment shader, in which case it causes the current invocation to terminate and the graphics system to discard the corresponding fragment so that it does not get combined with the framebuffer pixel at its coordinates.
Operations with side effects that were executed by the invocation before a `discard` will still be performed and their results will become visible according to the rules of the platform.
Compile-Time For Statement
--------------------------
A _compile-time for statement_ is used as an alternative to preprocessor techniques for loop unrolling.
It looks like:
```hlsl
$for( <name> in Range(<initial-value>, <upper-bound>)) <body statement>
```
The _initial value_ and _upper bound_ expressions must be compile-time constant integers.
The semantics of a compile-time for statement are as if it were expanded into:
```hlsl
{
let <name> = <initial-value>;
<body statement>
}
{
let <name> = <initial-value> + 1;
<body statement>
}
...
{
let <name> = <upper-bound> - 1;
<body statement>
}
```

View file

@ -1,770 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Declarations
============
Modules
-------
A module consists of one or more source units that are compiled together.
The global declarations in those source units comprise the body of the module.
In general, the order of declarations within a source unit does not matter; declarations can refer to other declarations (of types, functions, variables, etc.) later in the same source unit.
Declarations (other than `import` declarations) may freely be defined in any source unit in a module; declarations in one source unit of a module may freely refer to declarations in other source units.
Imports
-------
An import declaration is introduced with the keyword `import`:
```hlsl
import Shadowing;
```
An import declaration searches for a module matching the name given in the declaration, and brings the declarations in that module into scope in the current source unit.
> Note: an `import` declaration only applies to the scope of the current source unit, and does *not* import the chosen module so that it is visible to other source units of the current module.
The name of the module being imported may use a compound name:
```hlsl
import MyApp.Shadowing;
```
The mechanism used to search for a module is implementation-specific.
> Note: The current Slang implementation searches for a module by translating the specified module name into a file path by:
>
> * Replacing any dot (`.`) separators in a compound name with path separators (e.g., `/`)
>
> * Replacing any underscores (`_`) in the name with hyphens (`-`)
>
> * Appending the extension `.slang`
>
> The implementation then looks for a file matching this path on any of its configured search paths.
> If such a file is found it is loaded as a module comprising a single source unit.
The declarations of an imported module become visible to the current module, but they are not made visible to code that later imports the current module.
> Note: An experimental feature exists for an "exported" import declaration:
>
> ```hlsl
> // inside A.slang
> __exported import Shadowing;
> ```
>
> This example imports the declarations from `Shadowing` into the current module (module `A`),
> and also sets up information so that if other code declares `import A` then it can see
> both the declarations in `A` and those in `Shadowing`.
> Note: Mixing `import` declarations and traditional preprocessor-based (`#include`) modularity
> in a codebase can lead to surprising results.
>
> Some things to be aware of:
>
> * Preprocessor definitions in your module do *not* affect the code of modules you `import`.
>
> * Preprocessor definitions in a module you `import` do *not* affect your code
>
> * The above caveats also apply to "include guards" and `#pragma once`, since they operate at the granularity of a source unit (not across modules)
>
> * If you `import` two modules, and then both `#include` the same file, then those two modules may end up with duplicate declarations with the same name.
>
> As a general rule, be wary of preprocessor use inside of code meant to be an `import`able module.
Variables
---------
Variables are declared using the keywords `let` and `var`:
```hlsl
let x = 7;
var y = 9.0;
```
A `let` declaration introduces an immutable variable, which may not be assigned to or used as the argument for an `in out` or `out` parameter.
A `var` declaration introduces a mutable variable.
An explicit type may be given for a variable by placing it after the variable name and a colon (`:`):
```hlsl
let x : int = 7;
var y : float = 9.0;
```
If no type is specified for a variable, then a type will be inferred from the initial-value expression.
It is an error to declare a variable that has neither a type specifier or an initial-value expression.
It is an error to declare a variable with `let` without an initial-value expression.
A variable declared with `var` may be declared without an initial-value expression if it has an explicit type specifier:
```
var y : float;
```
In this case the variable is _uninitialized_ at the point of declaration, and must be explicitly initialized by assigning to it.
Code that uses the value of an uninitialized variable may produce arbitrary results, or even exhibit undefined behavior depending on the type of the variable.
Implementations *may* issue an error or warning for code that might make use of an uninitialized variable.
### Traditional Syntax
Variables may also be declared with traditional C-style syntax:
```hlsl
const int x = 7;
float y = 9.0;
```
For traditional variable declarations a type must be specified.
> Note: Slang does not support an `auto` type specifier like C++.
Traditional variable declarations are immutable if they are declared with the `const` modifier, and are otherwise mutable.
### Variables at Global Scope
Variables declared at global scope may be either a global constant, a static global variables, or a global shader parameters.
#### Global Constants
A variable declared at global scope and marked with `static` and `const` is a _global constant_.
A global constant must have an initial-value expression, and that initial-value expression must be a compile-time constant expression.
#### Static Global Variables
A variable declared at global scope and marked with `static` (but not with `const`) is a _static global variable_.
A static global variable provides storage for each invocation executing an entry point.
Assignments to a static global variable from one invocation do not affect the value seen by other invocations.
> Note: the semantics of static global variable are similar to a "thread-local" variable in other programming models.
A static global variable may include an initial-value expression; if an initial-value expression is included it is guaranteed to be evaluated and assigned to the variable before any other expression that references the variable is evaluated.
There is no guarantee that the initial-value expression for a static global variable is evaluated before entry point execution begins, or even that the initial-value expression is evaluated at all (in cases where the variable might not be referenced at runtime).
> Note: the above rules mean that an implementation may perform dead code elimination on static global variables, and may choose between eager and lazy initialization of those variables at its discretion.
#### Global Shader Parameters
A variable declared at global scope and not marked with `static` (even if marked with `const`) is a _global shader parameter_.
Global shader parameters are used to pass arguments from application code into invocations of an entry point.
The mechanisms for parameter passing are specific to each target platform.
> Note: Currently only global shader parameters of opaque types or arrays of opaque types are supported.
A global shader parameter may include an initial-value epxression, but such an expression does not affect the semantics of the compiled program.
> Note: Initial-value expressions on global shader parameters are only useful to set up "default values" that can be read via reflection information and used by application code.
### Variables at Function Scope
Variables declared at _function scope_ (in the body of a function, initializer, subscript accessor, etc.) may be either a function-scope constant, function-scope static variable, or a local variable.
#### Function-Scope Constants
A variable declared at function scope and marked with both `static` and `const` is a _function-scope constant_.
Semantically, a function-scope constant behaves like a global constant except that is name is only visible in the local scope.
#### Function-Scope Static Variables
A variable declared at function scope and marked with `static` (but not `const`) is a _function-scope static variable_.
Semantically, a function-scope static variable behaves like a global static variable except that its name is only visible in the local scope.
The initial-value expression for a function-scope static variable may refer to non-static variables in the body of the function.
In these cases initialization of the variable is guaranteed not to occur until at least the first time the function body is evaluated for a given invocation.
#### Local Variables
A variable declared at function scope and not marked with `static` (even if marked with `const`) is a _local variable_.
A local variable has unique storage for each _activation_ of a function by an invocation.
When a function is called recursively, each call produces a distinct activation with its own copies of local variables.
Functions
---------
Functions are declared using the `func` keyword:
```hlsl
func add(x: int, y: float) -> float { return float(x) + y; }
```
Parameters
----------
The parameters of the function are declared as `name: type` pairs.
Parameters may be given a _default value_ by including an initial-value-expression clause:
```hlsl
func add(x: int, y: float = 1.0f) { ... }
```
Parameters may be marked with a _direction_ which affects how data is passed between caller and callee:
```hlsl
func add(x: in out int, y : float) { x += ... }
```
The available directions are:
* `in` (the default) indicates typical pass-by-value (copy-in) semantics. The callee receives a *copy* of the argument passed by the caller.
* `out` indicates copy-out semantics. The callee writes to the parameter and then a copy of that value is assigned to the argument of the caller after the call returns.
* `in out` or `inout` indicates pass-by-value-result (copy-in and copy-out) semantics. The callee receives a copy of the argument passed by the caller, it may manipulate the copy, and then when the call returns the final value is copied back to the argument of the caller.
An implementation may assume that at every call site the arguments for `out` or `in out` parameters never alias.
Under those assumptions, the `out` and `inout` cases may be optimized to use pass-by-reference instead of copy-in and copy-out.
> Note: Applications that rely on the precise order in which write-back for `out` and `in out` parameters is performed are already on shaky semantic ground.
Body
----
The _body_ of a function declaration consists of statements enclosed in curly braces `{}`.
In some cases a function declaration does not include a body, and in these cases the declaration must be terminated with a semicolon (`;`):
```hlsl
func getCount() -> int;
```
> Note: Slang does not require "forward declaration" of functions, although
> forward declarations are supported as a compatibility feature.
>
> The only place where a function declaration without a definition should be
> required is in the body of an `interface` declaration.
The result type of a function mayb be specified after the parameter list using a _result type clause_ consisting of an arrow (`->`) followed by a type.
If the function result type is `void`, the result type clause may be elided:
```hlsl
func modify(x: in out int) { x++; }
```
### Traditional Syntax
Functions can also be declared with traditional C-style syntax:
```hlsl
float add(int x, float y) { return float(x) + y; }
void modify(in out int x) { x ++; }
```
> Note: Currently traditional syntax must be used for shader entry point functions,
> because only the traditional syntax currently supports attaching semantics to
> parameters.
### Entry Points
An _entry point_ is a function that will be used as the starting point of execution for one or more invocations of a shader.
Structure Types
---------------
Structure types are declared using the `struct` keyword:
```hlsl
struct Person
{
var age : int;
float height;
int getAge() { return age; }
func getHeight() -> float { return this.height; }
static func getPopulation() -> int { ... }
}
```
The body of a structure type declaration may include variable, type, function, and initializer declarations.
### Fields
Variable declarations in the body of a structure type declaration are also referred to as _fields_.
A field that is marked `static` is shared between all instances of the type, and is semantically like a global variable marked `static`.
A non-`static` field is also called an _instance field_.
### Methods
Function declarations in the body of a structure type declaration are also referred to as _methods_.
A method declaration may be marked `static`.
A `static` method must be invoked on the type itself (e.g., `Person.getPopulation()`).
A non-`static` method is also referred to as an _instance method_.
Instance methods must be invoked on an instance of the type (e.g., `somePerson.getAge()`).
The body of an instance method has access to an implicit `this` parameter which refers to the instance on which the method was invoked.
By default the `this` parameter of an instance method acts as an immutable variable.
An instance method with the `[mutating]` attribute receives a mutable `this` parameter, and can only be invoked on a mutable value of the structure type.
### Inheritance
A structure type declaration may include an _inheritance clause_ that consists of a colon (`:`) followed by a comma-separated list of types that the structure type inherits from:
```
struct Person : IHasAge, IHasName
{ .... }
```
When a structure type declares that it inherits from an interface, the programmer asserts that the structure type implements the required members of the interface.
### Syntax Details
A structure declaration does *not* need to be terminated with a semicolon:
```hlsl
// A terminating semicolon is allowed
struct Stuff { ... };
// The semicolon is not required
struct Things { ... }
```
When a structure declarations ends without a semicolon, the closing curly brace (`}`) must be the last non-comment, non-whitespace token on its line.
For compatibility with C-style code, a structure type declaration may be used as the type specifier in a traditional-style variable declaration:
```hlsl
struct Association
{
int from;
int to;
} associations[] =
{
{ 1, 1 },
{ 2, 4 },
{ 3, 9 },
};
```
If a structure type declaration will be used as part of a variable declaration, then the next token of the variable declaration must appear on the same line as the closing curly brace (`}`) of the structure type declaration.
The whole variable declaration must be terminated with a semicolon (`;`) as normal.
Enumeration Types
-----------------
Enumeration type declarations are introduced with the `enum` keyword:
```hlsl
enum Color
{
Red,
Green = 3,
Blue,
}
```
### Cases
The body of an enumeration type declaration consists of a comma-separated list of case declarations.
An optional trailing comma may terminate the lis of cases.
A _case declaration_ consists of the name of the case, along with an optional initial-value expression that specifies the _tag value_ for that case.
If the first case declaration in the body elides an initial-value expression, the value `0` is used for the tag value.
If any other case declaration elides an initial-value expressions, its tag value is one greater than the tag value of the immediately preceding case declaration.
An enumeration case is referred to as if it were a `static` member of the enumeration type (e.g., `Color.Red`).
### Inheritance
An enumeration type declaration may include an inheritance clause:
```hlsl
enum Color : uint
{ ... }
```
The inheritance clause of an enumeration declaration may currently only be used to specify a single type to be used as the _tag type_ of the enumeration type.
The tag type of an enumeration must be a built-in scalar integer type.
The tag value of each enumeration case will be a value of the tag type.
If no explicit tag type is specified, the type `int` is used instead.
> Note: The current Slang implementation has bugs that prevent explicit tag types from working correctly.
### Conversions
A value of an enumeration type can be implicitly converted to a value of its tag type:
```hlsl
int r = Color.Red;
```
Values of the tag type can be explicitly converted to the enumeration type:
```hlsl
Color red = Color(r);
```
Type Aliases
------------
A type alias is declared using the `typealias` keyword:
```hlsl
typealias Height = int;
```
A type alias defines a name that will be equivalent to the type to the right of `=`.
### Traditional Syntax
Type aliases can also be declared with traditional C-style syntax:
```hlsl
typedef int Height;
```
Constant Buffers and Texture Buffers
------------------------------------
As a compatibility feature, the `cbuffer` and `tbuffer` keywords can be used to introduce variable declarations.
A declaration of the form:
```hlsl
cbuffer Name
{
F field;
// ...
}
```
is equivalent to a declaration of the form:
```hlsl
struct AnonType
{
F field;
// ...
}
__transparent ConstantBuffer<AnonType> anonVar;
```
In this expansion, `AnonType` and `anonVar` are fresh names generated for the expansion that cannot collide with any name in user code, and the modifier `__transparent` makes it so that an unqualified reference to `field` can implicitly resolve to `anonVar.field`.
The keyword `tbuffer` uses an equivalent expansion, but with `TextureBuffer<T>` used instead of `ConstantBuffer<T>`.
Interfaces
----------
An interface is declared using the `interface` keyword:
```hlsl
interface IRandom
{
uint next();
}
```
The body of an interface declaration may contain function, initializer, subscript, and associated type declarations.
Each declaration in the body of an interface introduces a _requirement_ of the interface.
Types that declare conformance to the interface must provide matching implementations of the requirements.
Functions, initializers, and subscripts declared inside an interface must not have bodies; default implementations of interface requirements are not currently supported.
An interface declaration may have an inheritance clause:
```hlsl
interface IBase
{
int getBase();
}
interface IDerived : IBase
{
int getDerived();
}
```
The inheritance clause for an interface must only list other interfaces.
If an interface `I` lists another interface `J` in its inheritance clause, then `J` is a _base interface_ of `I`.
In order to conform to `I`, a type must also conform to `J`.
Associated Types
----------------
An associated type declaration is introduced with `associatedtype`:
```hlsl
associatedtype Iterator;
```
An associated type declaration introduces a type into the signature of an interface, without specifying the exact concrete type to use.
An associated type is an interface requirement, and different implementations of an interface may provide different types that satisfy the same associated type interface requirement:
```
interface IContainer
{
associatedtype Iterator;
...
}
struct MyArray : IContainer
{
typealias Iterator = Int;
...
}
struct MyLinkedList : IContainer
{
struct Iterator { ... }
...
}
```
It is an error to declare an associated type anywhere other than the body of an interface declaration.
An associated type declaration may have an inheritance clause.
The inheritance clause of an associated type may only list interfaces; these are the _required interfaces_ for the associated type.
A concrete type that is used to satisfy an associated type requirement must conform to all of the required interfaces of the associated type.
Initializers
------------
An initializer declaration is introduced with the `__init` keyword:
```hlsl
struct MyVector
{
float x, float y;
__init(float s)
{
x = s;
y = s;
}
}
```
> Note: Initializer declarations are a non-finalized and unstable feature, as indicated by the double-underscore (`__`) prefix on the keyword.
> Arbitrary changes to the syntax and semantics of initializers may be introduced in future versions of Slang.
An initializer declaration may only appear in the body of an interface or a structure type.
An initializer defines a method for initializing an instance of the enclosing type.
> Note: A C++ programmer might think of an initializer declaration as similar to a C++ _constructor_.
An initializer has a parameter list and body just like a function declaration.
An initializer must not include a result type clause; the result type of an initializer is always the enclosing type.
An initializer is invoked by calling the enclosing type as if it were a function.
E.g., in the example above, the initializer in `MyVector` can be invoked as `MyVector(1.0f)`.
An initializer has access to an implicit `this` variable that is the instance being initialized; an initializer must not be marked `static`.
The `this` variable of an initializer is always mutable; an initializer need not, and must not, be marked `[mutating]`.
> Note: Slang currently does not enforce that a type with an initializer can only be initialized using its initializers.
> It is possible for user code to declare a variable of type `MyVector` above, and explicitly write to the `x` and `y` fields to initialize it.
> A future version of the language may close up this loophole.
> Note: Slang does not provide any equivalent to C++ _destructors_ which run automatically when an instance goes out of scope.
Subscripts
----------
A subscript declaration is introduced with the `__subscript` keyword:
```hlsl
struct MyVector
{
...
__subscript(int index) -> float
{
get { return index == 0 ? x : y; }
}
}
```
> Note: subscript declarations are a non-finalized and unstable feature, as indicated by the double-underscore (`__`) prefix on the keyword.
> Arbitrary changes to the syntax and semantics of subscript declarations may be introduced in future versions of Slang.
A subscript declaration introduces a way for a user-defined type to support subscripting with the `[]` braces:
```hlsl
MyVector v = ...;
float f = v[0];
```
A subscript declaration lists one or more parameters inside parentheses, followed by a result type clause starting with `->`.
The result type clause of a subscript declaration cannot be elided.
The body of a subscript declaration consists of _accessor declarations_.
Currently only `get` accessor declarations are supported for user code.
A `get` accessor declaration introduces a _getter_ for the subscript.
The body of a getter is a code block like a function body, and must return the appropriate value for a subcript operation.
The body of a getter can access the parameters of the enclosing subscript, as a well as an implicit `this` parameter of the type that encloses the accessor.
The `this` parameter of a getter is immutable; `[mutating]` getters are not currently supported.
Extensions
----------
An extension declaration is introduced with the `extension` keyword:
```hlsl
extension MyVector
{
float getLength() { return sqrt(x*x + y*y); }
static int getDimensionality() { return 2; }
}
```
An extension declaration adds behavior to an existing type.
In the example above, the `MyVector` type is extended with an instance method `getLength()`, and a static method `getDimensionality()`.
An extension declaration names the type being extended after the `extension` keyword.
The body of an extension declaration may include type declarations, functions, initializers, and subscripts.
> Note: The body of an extension may *not* include variable declarations.
> An extension cannot introduce members that would change the in-memory layout of the type being extended.
The members of an extension are accessed through the type that is being extended.
For example, for the above extension of `MyVector`, the introduced methods are accessed as follows:
```hlsl
MyVector v = ...;
float f = v.getLength();
int n = MyVector.getDimensionality();
```
An extension declaration need not be placed in the same module as the type being extended; it is possible to extend a type from third-party or standard module code.
The members of an extension are only visible inside of modules that `import` the module declaring the extension;
extension members are *not* automatically visible wherever the type being extended is visible.
An extension declaration may include an inheritance clause:
```hlsl
extension MyVector : IPrintable
{
...
}
```
The inheritance clause of an extension declaration may only include interfaces.
When an extension declaration lists an interface in its inheritance clause, it asserts that the extension introduces a new conformance, such that the type being extended now conforms to the given interface.
The extension must ensure that the type being extended satisfies all the requirements of the interface.
Interface requirements may be satisfied by the members of the extension, members of the original type, or members introduced through other extensions visible at the point where the conformance was declared.
It is an error for overlapping conformances (that is, of the same type to the same interface) to be visible at the same point.
This includes cases where two extensions declare the same conformance, as well as those where the original type and an extension both declare the same conformance.
The conflicting conformances may come from the same module or difference modules.
In order to avoid problems with conflicting conformances, when a module `M` introduces a conformance of type `T` to interface `I`, one of the following should be true:
* the type `T` is declared in module `M`, or
* the type `I` is declared in module `M`
Any conformance that does not follow these rules (that is, where both `T` and `I` are imported into module `M`) is called a _retroactive_ conformance, and there is no way to guarantee that another module `N` will not introduce the same conformance.
The runtime behavior of programs that include overlapping retroactive conformances is currently undefined.
Currently, extension declarations can only apply to structure types; extensions cannot apply to enumeration types or interfaces.
Generics
--------
Many kinds of declarations can be made _generic_: structure types, interfaces, extensions, functions, initializers, and subscripts.
A generic declaration introduces a _generic parameter list_ enclosed in angle brackets `<>`:
```hlsl
T myFunction<T>(T left, T right, bool condition)
{
return condition ? left : right;
}
```
### Generic Parameters
A generic parameter list can include one or more parameters separated by commas.
The allowed forms for generic parameters are:
* A single identifier like `T` is used to declare a _generic type parameter_ with no constraints.
* A clause like `T : IFoo` is used to introduce a generic type parameter `T` where the parameter is _constrained_ so that it must conform to the `IFoo` interface.
* A clause like `let N : int` is used to introduce a generic value parameter `N`, which takes on values of type `int`.
> Note: The syntax for generic value parameters is provisional and subject to possible change in the future.
Generic parameters may declare a default value with `=`:
```hlsl
T anotherFunction<T = float, let N : int = 4>(vector<T,N> v);
```
For generic type parameters, the default value is a type to use if no argument is specified.
For generic value parameters, the default value is a value of the same type to use if no argument is specified.
### Explicit Specialization
A generic is _specialized_ by applying it to _generic arguments_ listed inside angle brackets `<>`:
```hlsl
anotherFunction<int, 3>
```
Specialization produces a reference to the declaration with all generic parameters bound to concrete arguments.
When specializing a generic, generic type parameters must be matched with type arguments that conform to the constraints on the parameter, if any.
Generic value parameters must be matched with value arguments of the appropriate type, and that are specialization-time constants.
An explicitly specialized function, type, etc. may be used wherever a non-generic function, type, etc. is expected:
```hlsl
int i = anotherFunction<int,3>( int3(99) );
```
### Implicit Specialization
If a generic function/type/etc. is used where a non-generic function/type/etc. is expected, the compiler attempts _implicit specialization_.
Implicit specialization infers generic arguments from the context at the use site, as well as any default values specified for generic parameters.
For example, if a programmer writes:
```hlsl
int i = anotherFunction( int3(99) );
```
The compiler will infer the generic arguments `<int, 3>` from the way that `anotherFunction` was applied to a value of type `int3`.
> Note: Inference for generic arguments currently only takes the types of value arguments into account.
> The expected result type does not currently affect inference.
### Syntax Details
The following examples show how generic declarations of different kinds are written:
```
T genericFunction<T>(T value);
funct genericFunction<T>(value: T) -> T;
__init<T>(T value);
__subscript<T>(T value) -> X { ... }
struct GenericType<T>
{
T field;
}
interface IGenericInterface<T> : IBase<T>
{
}
```
> Note: Currently there is no user-exposed syntax for writing a generic extension.

View file

@ -1,32 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Attributes
==========
> Note: This section is not yet complete.
## [[vk::spirv_instruction]]
** SPIR-V only **
This attribute is only available for Vulkan SPIR-V output.
The attribute allows access to SPIR-V intrinsics, by supplying a function declaration with the appropriate signature for the SPIR-V op and no body. The intrinsic takes a single parameter which is the integer value for the SPIR-V op.
In the example below the add function, uses the mechanism to directly use the SPIR-V integer add 'op' which is 128 in this case.
```HLSL
// 128 is OpIAdd in SPIR-V
[[vk::spirv_instruction(128)]]
uint add(uint a, uint b);
RWStructuredBuffer<uint> resultBuffer;
[numthreads(4,1,1)]
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
{
uint threadId = dispatchThreadID.x;
resultBuffer[threadId] = add(threadId, threadId);
}
```

View file

@ -1,16 +0,0 @@
> Note: This document is a work in progress. It is both incomplete and, in many cases, inaccurate.
Slang Language Reference
========================
Contents
--------
* [1 - Introduction](01-introduction.md)
* [2 - Lexical Structure](02-lexical-structure.md)
* [3 - Preprocessor](03-preprocessor.md)
* [4 - Types](04-types.md)
* [5 - Expressions](05-expressions.md)
* [6 - Statements](06-statements.md)
* [7 - Declarations](07-declarations.md)
* [8 - Attributes](08-attributes.md)

View file

@ -1,228 +0,0 @@
Parameter Layout Rules
======================
An important goal of the Slang project is that the rules for how shader parameters get assigned to `register`s/`binding`s is completely deterministic, so that users can rely on the compiler's behavior.
This document will attempt to explain the rules that Slang employs at a high level.
Eventually it might evolve into a formal specification of the expected behavior.
Guarantees
----------
The whole point of having a deterministic layout approach is the guarantees that it gives to users, so we will start by explicitly stating the guarantees that users can rely upon:
* A single top-level shader parameter will always occupy a contiguous range of bindings/registers for each resource type it consumes (e.g., a contiguous range of `t` registers, a contiguous range of bytes in a `cbuffer`, etc.).
* The amount of resources a parameter consumes depends only on its type, and top-level context in which it appears (e.g., is it in a `cbuffer`? an entry-point varying parameter? etc.).
* A shader parameter that is declared the same way in two different programs will get the same *amount* of resources (registers/bytes) allocated for it in both programs, but it might get a different starting offset/register.
* Changing the bodies of functions in shader code cannot change the layout of shader parameters. In particular, just because a shader parameter is "dead" does not mean it gets eliminated.
* If the user doesn't use explicit `register`/`layout` modifiers to bind parameters, then each module will get a contiguous range of bindings, and the overall program will always use a contiguous range starting from zero for each resource type.
Overview of the Layout Algorithm
--------------------------------
Layout is applied to a Slang *compile request* which comprises one or more *translation units* of user code, and zero or more `import`ed modules.
The compile request also specifies zero or more *entry points* to be compiled, where each entry point identifies a function and a profile to use.
Layout is always done with respect to a chosen *target*, and different targets might compute the resource usage for types differently, or apply different alignment.
Within a single target there may also be different layout rules (e.g., the difference between GLSL `std140` and `std430`).
Layout proceeds in four main phases:
1. Establish a global ordering on shader parameters
2. Compute the resource requirements of each shader parameter
3. Process shader parameters with fixed binding modifiers
4. Allocate bindings to parameter without fixed binding modifiers
Ordering (and Collapsing) Shader Parameters
-------------------------------------------
Shader parameters from the user's code always precede shader parameters from imported modules.
The order of parameters in the user's code is derived by "walking" through the code as follows:
* Walk through each translation unit in the order they were added via API (or the order they were listed on the command line)
* Walk through each source file of a translation unit in the order they were added/listed
* Walk through global-scope shader parameter declarations (global variables, `cbuffer`s, etc.) in the order they are listed in the (preprocessed) file.
* After all global parameters for a translation unit have been walked, walk through any entry points in the translation unit.
* When walking through an entry point, walk through all of its function parameters (both uniforms and varyings) in order, and then walk the function result as a varying output parameter.
When dealing with global-scope parameters in the user's code, it is possible for the "same" parameter to appear in multiple translation units.
Any two global shader parameters in user code with the same name are assumed to represent the same parameter, and will only be included in the global order at the first location where they are seen.
It is an error for the different declarations to have a mismatch in type, or conflicting explicit bindings.
Parameters from `import`ed modules are enumerated after the user code, using the order in which modules were first `import`ed.
The order of parameters within each module is the same as when the module was compiled, which matches the ordering given above.
Computing Resource Requirements
-------------------------------
Each shader parameter computes its resource requirements based on its type, and how it is declared.
* Global-scope parameters, entry point `uniform` parameters, and `cbuffer` declarations all use the "default" layout rules
* Entry point non-`uniform` parameters use "varying" layout rules, either input or output
* A few other special case rules exist (e.g., for laying out the elements of a `StructuredBuffer`), but most users will not need to worry about these
Note that the "default" rules are different for D3D and GL/Vulkan targets, because they have slightly different packing behavior.
### Plain Old Data
Under the default rules simple scalar types (`bool`, `int`, `float`, etc.) are laid out as "uniform" data (that is, bytes of ordinary memory).
In most cases, the size matches the expected data type size (although be aware that most targets treat `bool` as a synonym for `int`) and the alignment is the same as the size.
### Vectors
Vectors are laid out as N sequential scalars.
Under HLSL rules, a vector has the same alignment as its scalar type.
Under GLSL `std140` rules, a vector has an alignment that is its size rounded up to the next power of two (so a `float3` has `float4` alignment).
### Opaque Types
"Opaque" types include resource/sampler types like `Texture2D` and `SamplerState`.
These consume a single "slot" of the appropriate category for the chosen API.
Note that when compiling for D3D, a `Texture2D` and a `SamplerState` will consume different resources (`t` and `s` registers, respectively), but when compiling for Vulkan, they both consume the same resource ("descriptor table slot").
Opaque types currently all have an alignment of one.
### Structures
A structure is laid out by initializing a counter for each resource type, and then processing fields sequential (in declaration order):
* Compute resource usage for the field's type
* Adjust counters based on the alignment of the field for each resource type where it has non-zero usage
* Assign an offset to the field for each resource type where it has non-zero usage
* Add the resource usage of the field to the counters
An important wrinkle is that when doing layout for HLSL, we must ensure that if a field with uniform data that is smaller than 16 bytes would straddle a 16-byte boundary, we advance to the next 16-byte aligned offset.
The overall alignment of a `struct` is the maximum alignment of its fields or the default alignment (if it is larger).
The default alignment is 16 for both D3D and Vulkan targets.
The final resource usage of a `struct` is rounded up to a multiple of the alignment for each resource type. Note that we allow a `struct` to consume zero bytes of uniform storage.
It is important to note that a `struct` type can use resources of many different kinds, so in general we cannot talk about the "size" of a type, but only its size for a particular kind of resource (uniform bytes, texture registers, etc.).
### Sized Arrays
For uniform data, the size of the element type is rounded up to the target-specific minimum (e.g., 16 for D3D and Vulkan constant buffers) to arrive at the *stride* of the array. The total size of the array is then the stride times the element count.
For opaque resource types, the D3D case simply takes the stride to be the number of registers consumed by each element, and multiplies this by the element count.
For Vulkan, an array of resources uses only a single `binding`, so that the stride is always zero for these resource kinds, and the resource usage of an array is the same as its element type.
### Unsized Arrays
The uniform part of an unsized array has the same stride as for the sized case, but an effectively infinite size.
For register/binding resource usage, a Vulkan unsized array is just like a sized one, while a D3D array will consume a full register *space* instead of individual registers.
### Constant Buffers
To determine the resource usage of a constant buffer (either a `cbuffer { ... }` declaration or a `ConstantBuffer<T>`) we look at the resource usage of its element type.
If the element uses any uniform data, the constant buffer will use at least one constant-buffer register (or whatever the target-specific resource is).
If the element uses any non-uniform data, that usage will be added to that of the constant buffer.
### Parameter Blocks
A parameter block is similar to a constant buffer.
If the element type uses any uniform data, we compute resource usage for a constant buffer.
We then add in any non-uniform resource usage for the element types.
If the target requires use of register spaces (e.g., for Vulkan), then a parameter block uses a single register space; otherwise it exposes the resource usage of its element type directly.
Processing Explicit Binding Modifiers
-------------------------------------
If the user put an explicit binding modifier on a parameter, and that modifier applies to the current target, then we use it and "reserve" space in the overall binding range.
Traditional HLSL `register` modifiers only apply for D3D targets.
Slang currently allows GLSL-style `layout(binding =...)` modifiers to be attached to shader parameters, and will use those modifiers for GL/Vulkan targets.
If two parameters reserve overlapping ranges, we currently issue an error.
This may be downgraded to a warning for targets that support overlapping ranges.
Allocating Bindings to Parameters
---------------------------------
Once ranges have been reserved for parameters with explicit bindings, the compiler goes through all parameters again, in the global order and assigns them bindings based on their resource requirements.
For each resource type used by a parameter, it is allocated the first contiguous range of resources of that type that have not been reserved.
Splitting of Arrays
-------------------
In order to support `struct` types that mix uniform and non-uniform data, the Slang compiler always "splits" these types.
For example, given:
```hlsl
struct LightInfo { float3 pos; Texture2D shadowMap; };
LightInfo gLight;
```
Slang will generate code like:
```hlsl
float3 gLight_pos;
Texture2D gLight_shadowMap;
```
In a simple case like the above, this doesn't affect layout at all, but once arrays get involved, the layout can be more complicated. Consider this case:
```hlsl
struct Pair { Texture2D a; Texture2D b; };
Pair gPairs[8];
```
The output from the splitting step is equivalent to:
```hlsl
Texture2D gPairs_a[8];
Texture2D gPairs_b[8];
```
While this transformation is critical for having a type layout algorithm that applies across all APIs (and also it is pretty much required to work around various bugs in downstream compilers), it has the important down-side that the value `gPairs[0]` does not occupy a contiguous range of registers (although the top-level shader parameter `gPairs` *does*).
The Slang reflection API will correctly report the information about this situation:
* The "stride" of the `gPairs` array will be reported as one, because `gPairs[n+1].a` is always one register after `gPairs[n].a`.
* The offset of the `gPairs.b` field will be reported as 8, because `gPairs[0].b` will be 8 registers after the starting register for `gPairs`.
The Slang API tries to provide the best information it can in this case, but it is still important for users who mix arrays and complex `struct` types to know how the compiler will lay them out.
Generics
--------
Generic type parameters complicate these layout rules.
For example, we cannot compute the exact resource requirements for a `vector<T,3>` without knowing what the type `T` is.
When computing layouts for fully specialized types or programs, no special considerations are needed: the rules as described in this document still apply.
One important consequence to understand is that given a type like:
```hlsl
struct MyStuff<T>
{
int a;
T b;
int c;
}
```
the offset computed for the `c` field depends on the concrete type that gets plugged in for `T`.
We think this is the least surprising behavior for programmers who might be familiar with things like C++ template specialization.
In cases where confusion about a field like `c` getting different offsets in different specializations is a concern, users are encouraged to declare types so that all non-generic-dependent fields come before generic-dependent ones.

View file

@ -1,89 +0,0 @@
NVAPI Support
=============
Slang provides support for [NVAPI](https://developer.nvidia.com/nvapi) in several ways
* Slang allows the use of NVAPI directly, by the inclusion of the `#include "nvHLSLExtns.h"` header in your Slang code. Doing so will make all the NVAPI functions directly available and usable within your Slang source code.
* NVAPI is used to provide features implicitly for certain targets. For example support for [RWByteAddressBuffer atomics](target-compatibility.md) on HLSL based targets is supported currently via NVAPI.
* Direct and implicit NVAPI usage can be freely mixed.
Direct usage of NVAPI
=====================
Direct usage of NVAPI just requires the inclusion of the appropriate NVAPI header, typically with `#include "nvHLSLExtns.h` within your Slang source. As is required by NVAPI before the `#include` it is necessary to specify the slot and perhaps space usage. For example a typical direct NVAPI usage inside a Slang source file might contain something like...
```
#define NV_SHADER_EXTN_SLOT u0
#include "nvHLSLExtns.h"
```
In order for the include to work, it is necessary for the include path to include the folder that contains the nvHLSLExtns.h and associated headers.
Implicit usage of NVAPI
=======================
It is convenient and powerful to be able to directly use NVAPI calls, but will only work on such targets that support the mechansism, even if there is a way to support the functionality some other way.
Slang provides some cross platform features on HLSL based targets that are implemented via NVAPI. For example RWByteAddressBuffer atomics are supported on Vulkan, DX12 and CUDA. On DX12 they are made available via NVAPI, whilst CUDA and Vulkan have direct support. When compiling Slang code that uses RWByteAddressBuffer atomics Slang will emit HLSL code that use NVAPI. In order for the downstream compiler to be able to compile this HLSL it must be able to include the NVAPI header `nvHLSLExtns.h`.
It worth discussing briefly how this mechanism works. Slang has a 'prelude' mechanism for different source targets. The prelude is a piece of text that is inserted before the source that is output from compiling the input Slang source code. There is a default prelude for HLSL that is something like
```
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
#endif
```
If there are any calls to NVAPI implicitly from Slang source, then the following is emitted before the prelude
```
#define SLANG_HLSL_ENABLE_NVAPI 1
#define NV_SHADER_EXTN_SLOT u0
#define NV_SHADER_EXTN_REGISTER_SPACE space0
```
Thus causing the prelude to include nvHLSLExtns.h, and specifying the slot and potentially the space as is required for inclusion of nvHLSLExtns.h.
The actual values for the slot and optionally the space, are found by Slang examining the values of those values at the end of preprocessing input Slang source files.
This means that if you compile Slang source that has implicit use NVAPI, the slot and optionally the space must be defined. This can be achieved with a command line -D, through the API or through having suitable `#define`s in the Slang source code.
It is worth noting if you *replace* the default HLSL prelude, and use NVAPI then it will be necessary to have something like the default HLSL prelude part of your custom prelude.
Downstream Compiler Include
---------------------------
There is a subtle detail that is perhaps worth noting here around the downstream compiler and `#include`s. When Slang outputs HLSL it typically does not contain any `#include`, because all of the `#include` in the original source code have been handled by Slang. Slang then outputs everything required to compile to the downstream compiler *without* any `#include`. When NVAPI is used explicitly this is still the case - the NVAPI headers are consumed by Slang, and then Slang will output HLSL that does not contain any `#include`.
The astute reader may have noticed that the default Slang HLSL prelude *does* contain an include, which is enabled via SLANG_HLSL_ENABLE_NVAPI macro which Slang will set with implicit NVAPI use.
```
#ifdef SLANG_HLSL_ENABLE_NVAPI
#include "nvHLSLExtns.h"
#endif
```
This means that the *downstream* compiler (such as DXC and FXC) must be able to handle this include. Include paths can be specified for downstream compilers via the [-X mechanism](user-guide/08-compiling.md#downstream-arguments). So for example...
```
-Xfxc -IpathTo/nvapi -Xdxc -IpathTo/nvapi
```
In the explicit scenario where `nvHLSLExtns.h` is included in Slang source, the include path must be specified in Slang through the regular mechanisms.
In a scenario with both implicit and explicit use, both Slang *and* the downstream compiler need to have a suitable path specified. Things can be more complicated if there is mixed implicit/explicit NVAPI usage and in the Slang source the include path is set up such that NVAPI is included with
```
#include "nvapi/nvHLSLExtns.h"
```
Since Slang and the downstream compilers can specify different include paths, the downstream compiler include path can be such that `#include "nvHLSLExtns.h"` works with the default prelude.
Another way of working around this issue is to alter the prelude for downstream compilers such that it contains an absolute path for the `#include`. This is the mechanism that is currently used with the Slang test infrastructure.
Links
-----
More details on how this works can be found in the following PR
* [Simplify workflow when using NVAPI #1556](https://github.com/shader-slang/slang/pull/1556)

Some files were not shown because too many files have changed in this diff Show more