first commit

This commit is contained in:
Daniel Ledda
2023-04-12 21:49:48 +02:00
parent 658b5d693a
commit 993531655f
23 changed files with 7804 additions and 3462 deletions

311
lib/c/KHR/khrplatform.h Normal file
View File

@@ -0,0 +1,311 @@
#ifndef __khrplatform_h_
#define __khrplatform_h_
/*
** Copyright (c) 2008-2018 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
/* Khronos platform-specific types and definitions.
*
* The master copy of khrplatform.h is maintained in the Khronos EGL
* Registry repository at https://github.com/KhronosGroup/EGL-Registry
* The last semantic modification to khrplatform.h was at commit ID:
* 67a3e0864c2d75ea5287b9f3d2eb74a745936692
*
* Adopters may modify this file to suit their platform. Adopters are
* encouraged to submit platform specific modifications to the Khronos
* group so that they can be included in future versions of this file.
* Please submit changes by filing pull requests or issues on
* the EGL Registry repository linked above.
*
*
* See the Implementer's Guidelines for information about where this file
* should be located on your system and for more details of its use:
* http://www.khronos.org/registry/implementers_guide.pdf
*
* This file should be included as
* #include <KHR/khrplatform.h>
* by Khronos client API header files that use its types and defines.
*
* The types in khrplatform.h should only be used to define API-specific types.
*
* Types defined in khrplatform.h:
* khronos_int8_t signed 8 bit
* khronos_uint8_t unsigned 8 bit
* khronos_int16_t signed 16 bit
* khronos_uint16_t unsigned 16 bit
* khronos_int32_t signed 32 bit
* khronos_uint32_t unsigned 32 bit
* khronos_int64_t signed 64 bit
* khronos_uint64_t unsigned 64 bit
* khronos_intptr_t signed same number of bits as a pointer
* khronos_uintptr_t unsigned same number of bits as a pointer
* khronos_ssize_t signed size
* khronos_usize_t unsigned size
* khronos_float_t signed 32 bit floating point
* khronos_time_ns_t unsigned 64 bit time in nanoseconds
* khronos_utime_nanoseconds_t unsigned time interval or absolute time in
* nanoseconds
* khronos_stime_nanoseconds_t signed time interval in nanoseconds
* khronos_boolean_enum_t enumerated boolean type. This should
* only be used as a base type when a client API's boolean type is
* an enum. Client APIs which use an integer or other type for
* booleans cannot use this as the base type for their boolean.
*
* Tokens defined in khrplatform.h:
*
* KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
*
* KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
* KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
*
* Calling convention macros defined in this file:
* KHRONOS_APICALL
* KHRONOS_APIENTRY
* KHRONOS_APIATTRIBUTES
*
* These may be used in function prototypes as:
*
* KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
* int arg1,
* int arg2) KHRONOS_APIATTRIBUTES;
*/
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
# define KHRONOS_STATIC 1
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APICALL
*-------------------------------------------------------------------------
* This precedes the return type of the function in the function prototype.
*/
#if defined(KHRONOS_STATIC)
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
* header compatible with static linking. */
# define KHRONOS_APICALL
#elif defined(_WIN32)
# define KHRONOS_APICALL __declspec(dllimport)
#elif defined (__SYMBIAN32__)
# define KHRONOS_APICALL IMPORT_C
#elif defined(__ANDROID__)
# define KHRONOS_APICALL __attribute__((visibility("default")))
#else
# define KHRONOS_APICALL
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APIENTRY
*-------------------------------------------------------------------------
* This follows the return type of the function and precedes the function
* name in the function prototype.
*/
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
/* Win32 but not WinCE */
# define KHRONOS_APIENTRY __stdcall
#else
# define KHRONOS_APIENTRY
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APIATTRIBUTES
*-------------------------------------------------------------------------
* This follows the closing parenthesis of the function prototype arguments.
*/
#if defined (__ARMCC_2__)
#define KHRONOS_APIATTRIBUTES __softfp
#else
#define KHRONOS_APIATTRIBUTES
#endif
/*-------------------------------------------------------------------------
* basic type definitions
*-----------------------------------------------------------------------*/
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
/*
* Using <stdint.h>
*/
#include <stdint.h>
typedef int32_t khronos_int32_t;
typedef uint32_t khronos_uint32_t;
typedef int64_t khronos_int64_t;
typedef uint64_t khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
/*
* To support platform where unsigned long cannot be used interchangeably with
* inptr_t (e.g. CHERI-extended ISAs), we can use the stdint.h intptr_t.
* Ideally, we could just use (u)intptr_t everywhere, but this could result in
* ABI breakage if khronos_uintptr_t is changed from unsigned long to
* unsigned long long or similar (this results in different C++ name mangling).
* To avoid changes for existing platforms, we restrict usage of intptr_t to
* platforms where the size of a pointer is larger than the size of long.
*/
#if defined(__SIZEOF_LONG__) && defined(__SIZEOF_POINTER__)
#if __SIZEOF_POINTER__ > __SIZEOF_LONG__
#define KHRONOS_USE_INTPTR_T
#endif
#endif
#elif defined(__VMS ) || defined(__sgi)
/*
* Using <inttypes.h>
*/
#include <inttypes.h>
typedef int32_t khronos_int32_t;
typedef uint32_t khronos_uint32_t;
typedef int64_t khronos_int64_t;
typedef uint64_t khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
/*
* Win32
*/
typedef __int32 khronos_int32_t;
typedef unsigned __int32 khronos_uint32_t;
typedef __int64 khronos_int64_t;
typedef unsigned __int64 khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#elif defined(__sun__) || defined(__digital__)
/*
* Sun or Digital
*/
typedef int khronos_int32_t;
typedef unsigned int khronos_uint32_t;
#if defined(__arch64__) || defined(_LP64)
typedef long int khronos_int64_t;
typedef unsigned long int khronos_uint64_t;
#else
typedef long long int khronos_int64_t;
typedef unsigned long long int khronos_uint64_t;
#endif /* __arch64__ */
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#elif 0
/*
* Hypothetical platform with no float or int64 support
*/
typedef int khronos_int32_t;
typedef unsigned int khronos_uint32_t;
#define KHRONOS_SUPPORT_INT64 0
#define KHRONOS_SUPPORT_FLOAT 0
#else
/*
* Generic fallback
*/
#include <stdint.h>
typedef int32_t khronos_int32_t;
typedef uint32_t khronos_uint32_t;
typedef int64_t khronos_int64_t;
typedef uint64_t khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#endif
/*
* Types that are (so far) the same on all platforms
*/
typedef signed char khronos_int8_t;
typedef unsigned char khronos_uint8_t;
typedef signed short int khronos_int16_t;
typedef unsigned short int khronos_uint16_t;
/*
* Types that differ between LLP64 and LP64 architectures - in LLP64,
* pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
* to be the only LLP64 architecture in current use.
*/
#ifdef KHRONOS_USE_INTPTR_T
typedef intptr_t khronos_intptr_t;
typedef uintptr_t khronos_uintptr_t;
#elif defined(_WIN64)
typedef signed long long int khronos_intptr_t;
typedef unsigned long long int khronos_uintptr_t;
#else
typedef signed long int khronos_intptr_t;
typedef unsigned long int khronos_uintptr_t;
#endif
#if defined(_WIN64)
typedef signed long long int khronos_ssize_t;
typedef unsigned long long int khronos_usize_t;
#else
typedef signed long int khronos_ssize_t;
typedef unsigned long int khronos_usize_t;
#endif
#if KHRONOS_SUPPORT_FLOAT
/*
* Float type
*/
typedef float khronos_float_t;
#endif
#if KHRONOS_SUPPORT_INT64
/* Time types
*
* These types can be used to represent a time interval in nanoseconds or
* an absolute Unadjusted System Time. Unadjusted System Time is the number
* of nanoseconds since some arbitrary system event (e.g. since the last
* time the system booted). The Unadjusted System Time is an unsigned
* 64 bit value that wraps back to 0 every 584 years. Time intervals
* may be either signed or unsigned.
*/
typedef khronos_uint64_t khronos_utime_nanoseconds_t;
typedef khronos_int64_t khronos_stime_nanoseconds_t;
#endif
/*
* Dummy value used to pad enum types to 32 bits.
*/
#ifndef KHRONOS_MAX_ENUM
#define KHRONOS_MAX_ENUM 0x7FFFFFFF
#endif
/*
* Enumerated boolean type
*
* Values other than zero should be considered to be true. Therefore
* comparisons should not be made against KHRONOS_TRUE.
*/
typedef enum {
KHRONOS_FALSE = 0,
KHRONOS_TRUE = 1,
KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
} khronos_boolean_enum_t;
#endif /* __khrplatform_h_ */

1833
lib/c/glad/glad.c Normal file

File diff suppressed because it is too large Load Diff

3694
lib/c/glad/glad.h Normal file

File diff suppressed because it is too large Load Diff

7898
lib/c/loaders/stb_image.h Normal file

File diff suppressed because it is too large Load Diff

1739
lib/c/loaders/tinyobj.h Normal file

File diff suppressed because it is too large Load Diff

138
lib/zmath/README.md Normal file
View File

@@ -0,0 +1,138 @@
# zmath v0.9.6 - SIMD math library for game developers
Tested on x86_64 and AArch64.
Provides ~140 optimized routines and ~70 extensive tests.
Can be used with any graphics API.
Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig).
Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig).
An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn).
## Getting started
Copy `zmath` folder to a `libs` subdirectory of the root of your project.
Then in your `build.zig` add:
```zig
const std = @import("std");
const zmath = @import("libs/zmath/build.zig");
pub fn build(b: *std.Build) void {
...
const optimize = b.standardOptimizeOption(.{});
const target = b.standardTargetOptions(.{});
zmath_pkg = zmath.package(b, target, optimize, .{
.options = .{ .enable_cross_platform_determinism = true },
});
zmath_pkg.link(exe);
}
```
Now in your code you may import and use zmath:
```zig
const zm = @import("zmath");
pub fn main() !void {
//
// OpenGL/Vulkan example
//
const object_to_world = zm.rotationY(..);
const world_to_view = zm.lookAtRh(
zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
);
// `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`)
const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
const object_to_view = zm.mul(object_to_world, world_to_view);
const object_to_clip = zm.mul(object_to_view, view_to_clip);
// Transposition is needed because GLSL uses column-major matrices by default
gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip));
// In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip;
//
// DirectX example
//
const object_to_world = zm.rotationY(..);
const world_to_view = zm.lookAtLh(
zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
);
const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
const object_to_view = zm.mul(object_to_world, world_to_view);
const object_to_clip = zm.mul(object_to_view, view_to_clip);
// Transposition is needed because HLSL uses column-major matrices by default
const mem = allocateUploadMemory(...);
zm.storeMat(mem, zm.transpose(object_to_clip));
// In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip);
//
// 'WASD' camera movement example
//
{
const speed = zm.f32x4s(10.0);
const delta_time = zm.f32x4s(demo.frame_stats.delta_time);
const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw));
var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform));
zm.storeArr3(&demo.camera.forward, forward);
const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward));
forward = speed * delta_time * forward;
var cam_pos = zm.loadArr3(demo.camera.position);
if (keyDown('W')) {
cam_pos += forward;
} else if (keyDown('S')) {
cam_pos -= forward;
}
if (keyDown('D')) {
cam_pos += right;
} else if (keyDown('A')) {
cam_pos -= right;
}
zm.storeArr3(&demo.camera.position, cam_pos);
}
//
// SIMD wave equation solver example (works with vector width 4, 8 and 16)
// 'T' can be F32x4, F32x8 or F32x16
//
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @intToFloat(f32, z_index - grid_size / 2);
const vz = zm.splat(T, z);
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
const x = scale * @intToFloat(f32, x_index - grid_size / 2);
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
const d = zm.sqrt(vx * vx + vz * vz);
const vy = zm.sin(d - vtime);
const index = @intCast(usize, x_index + z_index * grid_size);
zm.store(xslice[index..], vx, 0);
zm.store(yslice[index..], vy, 0);
zm.store(zslice[index..], vz, 0);
}
}
}
```

97
lib/zmath/build.zig Normal file
View File

@@ -0,0 +1,97 @@
const std = @import("std");
pub const Options = struct {
enable_cross_platform_determinism: bool = true,
};
pub const Package = struct {
options: Options,
zmath: *std.Build.Module,
zmath_options: *std.Build.Module,
pub fn link(pkg: Package, exe: *std.Build.CompileStep) void {
exe.addModule("zmath", pkg.zmath);
exe.addModule("zmath_options", pkg.zmath_options);
}
};
pub fn package(
b: *std.Build,
_: std.zig.CrossTarget,
_: std.builtin.Mode,
args: struct {
options: Options = .{},
},
) Package {
const step = b.addOptions();
step.addOption(
bool,
"enable_cross_platform_determinism",
args.options.enable_cross_platform_determinism,
);
const zmath_options = step.createModule();
const zmath = b.createModule(.{
.source_file = .{ .path = thisDir() ++ "/src/main.zig" },
.dependencies = &.{
.{ .name = "zmath_options", .module = zmath_options },
},
});
return .{
.options = args.options,
.zmath = zmath,
.zmath_options = zmath_options,
};
}
pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{});
const target = b.standardTargetOptions(.{});
const test_step = b.step("test", "Run zmath tests");
test_step.dependOn(runTests(b, optimize, target));
const benchmark_step = b.step("benchmark", "Run zmath benchmarks");
benchmark_step.dependOn(runBenchmarks(b, target));
}
pub fn runTests(
b: *std.Build,
optimize: std.builtin.Mode,
target: std.zig.CrossTarget,
) *std.Build.Step {
const tests = b.addTest(.{
.name = "zmath-tests",
.root_source_file = .{ .path = thisDir() ++ "/src/main.zig" },
.target = target,
.optimize = optimize,
});
const zmath_pkg = package(b, target, optimize, .{});
tests.addModule("zmath_options", zmath_pkg.zmath_options);
return &tests.run().step;
}
pub fn runBenchmarks(
b: *std.Build,
target: std.zig.CrossTarget,
) *std.Build.Step {
const exe = b.addExecutable(.{
.name = "zmath-benchmarks",
.root_source_file = .{ .path = thisDir() ++ "/src/benchmark.zig" },
.target = target,
.optimize = .ReleaseFast,
});
const zmath_pkg = package(b, target, .ReleaseFast, .{});
exe.addModule("zmath", zmath_pkg.zmath);
return &exe.run().step;
}
inline fn thisDir() []const u8 {
return comptime std.fs.path.dirname(@src().file) orelse ".";
}

469
lib/zmath/src/benchmark.zig Normal file
View File

@@ -0,0 +1,469 @@
// -------------------------------------------------------------------------------------------------
// zmath - benchmarks
// -------------------------------------------------------------------------------------------------
// 'zig build benchmark' in the root project directory will build and run 'ReleaseFast' configuration.
//
// -------------------------------------------------------------------------------------------------
// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s
// cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s
// quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s
// wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s
//
// -------------------------------------------------------------------------------------------------
// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s
// cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s
// quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s
// wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s
//
// -------------------------------------------------------------------------------------------------
// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s
// cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s
// quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s
// wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s
//
// -------------------------------------------------------------------------------------------------
// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s
// cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s
// quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s
// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
//
// -------------------------------------------------------------------------------------------------
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
// m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
try mat4MulBenchmark(allocator, 100_000);
// v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
try cross3ScaleBiasBenchmark(allocator, 10_000);
// v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
// q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
try quatBenchmark(allocator, 10_000);
// d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
try waveBenchmark(allocator, 1_000);
}
const std = @import("std");
const time = std.time;
const Timer = time.Timer;
const zm = @import("zmath");
var prng = std.rand.DefaultPrng.init(0);
const random = prng.random();
noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("\n", .{});
std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});
var data0 = std.ArrayList([16]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([16]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 64) : (i += 1) {
try data0.append([16]f32{
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
});
try data1.append([16]f32{
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
});
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const ma = zm.loadMat(a[0..]);
const mb = zm.loadMat(b[0..]);
const r = zm.mul(ma, mb);
std.mem.doNotOptimizeAway(&r);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const r = [16]f32{
a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12],
a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13],
a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14],
a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15],
a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12],
a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13],
a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14],
a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15],
a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12],
a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13],
a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14],
a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15],
a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12],
a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13],
a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14],
a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15],
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const ma = zm.loadMat(a[0..]);
const mb = zm.loadMat(b[0..]);
const r = zm.mul(ma, mb);
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});
var data0 = std.ArrayList([3]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([3]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 256) : (i += 1) {
try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
std.mem.doNotOptimizeAway(&cp);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const r = [3]f32{
0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0,
0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0,
0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0,
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
std.mem.doNotOptimizeAway(&cp);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});
var data0 = std.ArrayList([3]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([3]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 256) : (i += 1) {
try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0];
std.mem.doNotOptimizeAway(&r);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
const r = [3]f32{
d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0),
d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0),
d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0),
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0));
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});
var data0 = std.ArrayList([4]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([4]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 256) : (i += 1) {
try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr4(a);
const vb = zm.loadArr4(b);
const r = zm.qmul(va, vb);
std.mem.doNotOptimizeAway(&r);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const r = [4]f32{
(b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]),
(b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]),
(b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]),
(b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]),
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr4(a);
const vb = zm.loadArr4(b);
const r = zm.qmul(va, vb);
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
_ = allocator;
std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});
const grid_size = 1024;
{
var t: f32 = 0.0;
const scale: f32 = 0.05;
var timer = try Timer.start();
const start = timer.lap();
var iter: usize = 0;
while (iter < count) : (iter += 1) {
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @intToFloat(f32, z_index - grid_size / 2);
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += 4) {
const x0 = scale * @intToFloat(f32, x_index + 0 - grid_size / 2);
const x1 = scale * @intToFloat(f32, x_index + 1 - grid_size / 2);
const x2 = scale * @intToFloat(f32, x_index + 2 - grid_size / 2);
const x3 = scale * @intToFloat(f32, x_index + 3 - grid_size / 2);
const d0 = zm.sqrt(x0 * x0 + z * z);
const d1 = zm.sqrt(x1 * x1 + z * z);
const d2 = zm.sqrt(x2 * x2 + z * z);
const d3 = zm.sqrt(x3 * x3 + z * z);
const y0 = zm.sin(d0 - t);
const y1 = zm.sin(d1 - t);
const y2 = zm.sin(d2 - t);
const y3 = zm.sin(d3 - t);
std.mem.doNotOptimizeAway(&y0);
std.mem.doNotOptimizeAway(&y1);
std.mem.doNotOptimizeAway(&y2);
std.mem.doNotOptimizeAway(&y3);
}
}
t += 0.001;
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
const T = zm.F32x16;
const static = struct {
const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
};
const voffset = zm.load(static.offsets[0..], T, 0);
var vt = zm.splat(T, 0.0);
const scale: f32 = 0.05;
var timer = try Timer.start();
const start = timer.lap();
var iter: usize = 0;
while (iter < count) : (iter += 1) {
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @intToFloat(f32, z_index - grid_size / 2);
const vz = zm.splat(T, z);
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
const x = scale * @intToFloat(f32, x_index - grid_size / 2);
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
const d = zm.sqrt(vx * vx + vz * vz);
const vy = zm.sin(d - vt);
std.mem.doNotOptimizeAway(&vy);
}
}
vt += zm.splat(T, 0.001);
}
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}

18
lib/zmath/src/main.zig Normal file
View File

@@ -0,0 +1,18 @@
//--------------------------------------------------------------------------------------------------
//
// SIMD math library for game developers
// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
//
// See zmath.zig for more details.
// See util.zig for additional functionality.
//
//--------------------------------------------------------------------------------------------------
pub const version = @import("std").SemanticVersion{ .major = 0, .minor = 9, .patch = 6 };
pub usingnamespace @import("zmath.zig");
pub const util = @import("util.zig");
// ensure transitive closure of test coverage
comptime {
_ = util;
}

182
lib/zmath/src/util.zig Normal file
View File

@@ -0,0 +1,182 @@
// ==============================================================================
//
// Collection of useful functions building on top of, and extending, core zmath.
// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
//
// ------------------------------------------------------------------------------
// 1. Matrix functions
// ------------------------------------------------------------------------------
//
// As an example, in a left handed Y-up system:
// getAxisX is equivalent to the right vector
// getAxisY is equivalent to the up vector
// getAxisZ is equivalent to the forward vector
//
// getTranslationVec(m: Mat) Vec
// getAxisX(m: Mat) Vec
// getAxisY(m: Mat) Vec
// getAxisZ(m: Mat) Vec
//
// ==============================================================================
const zm = @import("zmath.zig");
const std = @import("std");
const math = std.math;
const expect = std.testing.expect;
pub fn getTranslationVec(m: zm.Mat) zm.Vec {
var translation = m[3];
translation[3] = 0;
return translation;
}
pub fn getScaleVec(m: zm.Mat) zm.Vec {
const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0];
const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0];
const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0];
return zm.f32x4(scale_x, scale_y, scale_z, 0);
}
pub fn getRotationQuat(_m: zm.Mat) zm.Quat {
// Ortho normalize given matrix.
const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0));
const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0));
const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0));
var m = _m;
m[0][0] = c1[0];
m[1][0] = c1[1];
m[2][0] = c1[2];
m[0][1] = c2[0];
m[1][1] = c2[1];
m[2][1] = c2[2];
m[0][2] = c3[0];
m[1][2] = c3[1];
m[2][2] = c3[2];
// Extract rotation
return zm.quatFromMat(m);
}
pub fn getAxisX(m: zm.Mat) zm.Vec {
return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0));
}
pub fn getAxisY(m: zm.Mat) zm.Vec {
return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0));
}
pub fn getAxisZ(m: zm.Mat) zm.Vec {
return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0));
}
test "zmath.util.mat.translation" {
// zig fmt: off
const mat_data = [18]f32{
1.0,
2.0, 3.0, 4.0, 5.0,
6.0, 7.0, 8.0, 9.0,
10.0,11.0, 12.0,13.0,
14.0, 15.0, 16.0, 17.0,
18.0,
};
// zig fmt: on
const mat = zm.loadMat(mat_data[1..]);
const translation = getTranslationVec(mat);
try expect(zm.approxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001));
}
test "zmath.util.mat.scale" {
const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8));
const scale = getScaleVec(mat);
try expect(zm.approxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001));
}
test "zmath.util.mat.rotation" {
const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3);
const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8));
const rotate_get = getRotationQuat(mat);
const v0 = zm.mul(zm.f32x4s(1), rotate_origin);
const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get));
try expect(zm.approxEqAbs(v0, v1, 0.0001));
}
test "zmath.util.mat.z_vec" {
const degToRad = std.math.degreesToRadians;
var identity = zm.identity();
var z_vec = getAxisZ(identity);
try expect(zm.approxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001));
const rot_yaw = zm.rotationY(degToRad(f32, 90));
identity = zm.mul(identity, rot_yaw);
z_vec = getAxisZ(identity);
try expect(zm.approxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001));
}
test "zmath.util.mat.y_vec" {
const degToRad = std.math.degreesToRadians;
var identity = zm.identity();
var y_vec = getAxisY(identity);
try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
const rot_yaw = zm.rotationY(degToRad(f32, 90));
identity = zm.mul(identity, rot_yaw);
y_vec = getAxisY(identity);
try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
const rot_pitch = zm.rotationX(degToRad(f32, 90));
identity = zm.mul(identity, rot_pitch);
y_vec = getAxisY(identity);
try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01));
}
test "zmath.util.mat.right" {
const degToRad = std.math.degreesToRadians;
var identity = zm.identity();
var right = getAxisX(identity);
try expect(zm.approxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01));
const rot_yaw = zm.rotationY(degToRad(f32, 90));
identity = zm.mul(identity, rot_yaw);
right = getAxisX(identity);
try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01));
const rot_pitch = zm.rotationX(degToRad(f32, 90));
identity = zm.mul(identity, rot_pitch);
right = getAxisX(identity);
try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01));
}
// ------------------------------------------------------------------------------
// This software is available under 2 licenses -- choose whichever you prefer.
// ------------------------------------------------------------------------------
// ALTERNATIVE A - MIT License
// Copyright (c) 2022 Michal Ziulek and Contributors
// Permission is hereby granted, free of charge, to any person obtaining identity copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// ------------------------------------------------------------------------------
// ALTERNATIVE B - Public Domain (www.unlicense.org)
// This is free and unencumbered software released into the public domain.
// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
// software, either in source code form or as identity compiled binary, for any purpose,
// commercial or non-commercial, and by any means.
// In jurisdictions that recognize copyright laws, the author or authors of this
// software dedicate any and all copyright interest in the software to the public
// domain. We make this dedication for the benefit of the public at large and to
// the detriment of our heirs and successors. We intend this dedication to be an
// overt act of relinquishment in perpetuity of all present and future rights to
// this software under copyright law.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// ------------------------------------------------------------------------------

4442
lib/zmath/src/zmath.zig Normal file

File diff suppressed because it is too large Load Diff