zig/test/nvptx.zig
Andrew Kelley 142471fcc4 zig build system: change target, compilation, and module APIs
Introduce the concept of "target query" and "resolved target". A target
query is what the user specifies, with some things left to default. A
resolved target has the default things discovered and populated.
In the future, std.zig.CrossTarget will be rename to std.Target.Query.
Introduces `std.Build.resolveTargetQuery` to get from one to the other.

The concept of `main_mod_path` is gone, no longer supported. You have to
put the root source file at the module root now.

* remove deprecated API
* update build.zig for the breaking API changes in this branch
* move std.Build.Step.Compile.BuildId to std.zig.BuildId
* add more options to std.Build.ExecutableOptions, std.Build.ObjectOptions,
  std.Build.SharedLibraryOptions, std.Build.StaticLibraryOptions, and
  std.Build.TestOptions.
* remove `std.Build.constructCMacro`. There is no use for this API.
* deprecate `std.Build.Step.Compile.defineCMacro`. Instead,
  `std.Build.Module.addCMacro` is provided.
  - remove `std.Build.Step.Compile.defineCMacroRaw`.
* deprecate `std.Build.Step.Compile.linkFrameworkNeeded`
  - use `std.Build.Module.linkFramework`
* deprecate `std.Build.Step.Compile.linkFrameworkWeak`
  - use `std.Build.Module.linkFramework`
* move more logic into `std.Build.Module`
* allow `target` and `optimize` to be `null` when creating a Module.
  Along with other fields, those unspecified options will be inherited
  from parent `Module` when inserted into an import table.
* the `target` field of `addExecutable` is now required. pass `b.host`
  to get the host target.
2024-01-01 17:51:18 -07:00

104 lines
3.2 KiB
Zig

const std = @import("std");
const Cases = @import("src/Cases.zig");
pub fn addCases(ctx: *Cases, b: *std.Build) !void {
const target = b.resolveTargetQuery(.{
.cpu_arch = .nvptx64,
.os_tag = .cuda,
});
{
var case = addPtx(ctx, target, "simple addition and subtraction");
case.addCompile(
\\fn add(a: i32, b: i32) i32 {
\\ return a + b;
\\}
\\
\\pub export fn add_and_substract(a: i32, out: *i32) callconv(.Kernel) void {
\\ const x = add(a, 7);
\\ var y = add(2, 0);
\\ y -= x;
\\ out.* = y;
\\}
);
}
{
var case = addPtx(ctx, target, "read special registers");
case.addCompile(
\\fn threadIdX() u32 {
\\ return asm ("mov.u32 \t%[r], %tid.x;"
\\ : [r] "=r" (-> u32),
\\ );
\\}
\\
\\pub export fn special_reg(a: []const i32, out: []i32) callconv(.Kernel) void {
\\ const i = threadIdX();
\\ out[i] = a[i] + 7;
\\}
);
}
{
var case = addPtx(ctx, target, "address spaces");
case.addCompile(
\\var x: i32 addrspace(.global) = 0;
\\
\\pub export fn increment(out: *i32) callconv(.Kernel) void {
\\ x += 1;
\\ out.* = x;
\\}
);
}
{
var case = addPtx(ctx, target, "reduce in shared mem");
case.addCompile(
\\fn threadIdX() u32 {
\\ return asm ("mov.u32 \t%[r], %tid.x;"
\\ : [r] "=r" (-> u32),
\\ );
\\}
\\
\\ var _sdata: [1024]f32 addrspace(.shared) = undefined;
\\ pub export fn reduceSum(d_x: []const f32, out: *f32) callconv(.Kernel) void {
\\ var sdata: *addrspace(.generic) [1024]f32 = @addrSpaceCast(&_sdata);
\\ const tid: u32 = threadIdX();
\\ var sum = d_x[tid];
\\ sdata[tid] = sum;
\\ asm volatile ("bar.sync \t0;");
\\ var s: u32 = 512;
\\ while (s > 0) : (s = s >> 1) {
\\ if (tid < s) {
\\ sum += sdata[tid + s];
\\ sdata[tid] = sum;
\\ }
\\ asm volatile ("bar.sync \t0;");
\\ }
\\
\\ if (tid == 0) {
\\ out.* = sum;
\\ }
\\ }
);
}
}
fn addPtx(ctx: *Cases, target: std.Build.ResolvedTarget, name: []const u8) *Cases.Case {
ctx.cases.append(.{
.name = name,
.target = target,
.updates = std.ArrayList(Cases.Update).init(ctx.cases.allocator),
.output_mode = .Obj,
.deps = std.ArrayList(Cases.DepModule).init(ctx.cases.allocator),
.link_libc = false,
.backend = .llvm,
// Bug in Debug mode
.optimize_mode = .ReleaseSafe,
}) catch @panic("out of memory");
return &ctx.cases.items[ctx.cases.items.len - 1];
}