From 256292c20d7b14cc2e2d97b1c39aba94b4dbb76d Mon Sep 17 00:00:00 2001 From: Daniel Ledda Date: Fri, 3 Jan 2025 19:21:18 +0100 Subject: [PATCH] fixing stuff --- .clangd | 4 + CMakeLists.txt | 98 - build | 3 + build.bat | 27 + build.zig | 55 - lib/zmath/README.md | 138 - lib/zmath/build.zig | 97 - lib/zmath/src/benchmark.zig | 469 --- lib/zmath/src/main.zig | 18 - lib/zmath/src/util.zig | 182 - lib/zmath/src/zmath.zig | 4442 ------------------------ src/SomaSolve.cpp | 366 +- src/SomaSolve.h | 11 +- src/VoxelSpace.cpp | 496 ++- src/VoxelSpace.h | 76 +- src/c.zig | 9 - src/gfx/Color.cpp | 11 +- src/gfx/Color.h | 4 +- src/gfx/Color.zig | 43 - src/gfx/Mesh.cpp | 2 +- src/gfx/Mesh.h | 6 +- src/gfx/Mesh.zig | 94 - src/gfx/OrbitControls.cpp | 0 src/gfx/OrbitControls.h | 81 - src/gfx/Shader.cpp | 27 +- src/gfx/Shader.h | 2 +- src/gfx/Shader.zig | 56 - src/gfx/Texture.cpp | 8 +- src/gfx/Texture.h | 2 +- src/gfx/djleddaGeom.zig | 57 - src/gfx/geometry.cpp | 80 +- src/gfx/geometry.h | 26 +- {lib/c => src/lib}/KHR/khrplatform.h | 0 src/lib/djstdlib/app.cpp | 14 + src/lib/djstdlib/core.cpp | 511 +++ src/lib/djstdlib/core.h | 219 ++ src/lib/djstdlib/os.cpp | 12 + src/lib/djstdlib/os.h | 12 + src/lib/djstdlib/os_linux.cpp | 24 + src/lib/djstdlib/os_win32.cpp | 21 + src/lib/djstdlib/vendor/stb_sprintf.h | 1923 ++++++++++ {lib/c => src/lib}/glad/glad.c | 0 {lib/c => src/lib}/glad/glad.h | 0 {lib/c => src/lib}/loaders/stb_image.h | 0 {lib/c => src/lib}/loaders/tinyobj.h | 0 src/main.cpp | 174 +- src/main.zig | 402 --- 47 files changed, 3401 insertions(+), 6901 deletions(-) create mode 100644 .clangd delete mode 100644 CMakeLists.txt create mode 100644 build create mode 100644 build.bat delete mode 100644 build.zig delete mode 100644 lib/zmath/README.md delete mode 100644 lib/zmath/build.zig delete mode 100644 lib/zmath/src/benchmark.zig delete mode 100644 lib/zmath/src/main.zig delete mode 100644 lib/zmath/src/util.zig delete mode 100644 lib/zmath/src/zmath.zig delete mode 100644 src/c.zig delete mode 100644 src/gfx/Color.zig delete mode 100644 src/gfx/Mesh.zig delete mode 100644 src/gfx/OrbitControls.cpp delete mode 100644 src/gfx/OrbitControls.h delete mode 100644 src/gfx/Shader.zig delete mode 100644 src/gfx/djleddaGeom.zig rename {lib/c => src/lib}/KHR/khrplatform.h (100%) create mode 100644 src/lib/djstdlib/app.cpp create mode 100644 src/lib/djstdlib/core.cpp create mode 100644 src/lib/djstdlib/core.h create mode 100644 src/lib/djstdlib/os.cpp create mode 100644 src/lib/djstdlib/os.h create mode 100644 src/lib/djstdlib/os_linux.cpp create mode 100644 src/lib/djstdlib/os_win32.cpp create mode 100644 src/lib/djstdlib/vendor/stb_sprintf.h rename {lib/c => src/lib}/glad/glad.c (100%) rename {lib/c => src/lib}/glad/glad.h (100%) rename {lib/c => src/lib}/loaders/stb_image.h (100%) rename {lib/c => src/lib}/loaders/tinyobj.h (100%) delete mode 100644 src/main.zig diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..69eef2f --- /dev/null +++ b/.clangd @@ -0,0 +1,4 @@ +CompileFlags: + Add: + - -DOS_LINUX + - -I ./ diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 28b3018..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,98 +0,0 @@ -cmake_minimum_required(VERSION 3.24) -project(somaesque) - -set(VENDOR_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor") -set(SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") - -set(CMAKE_EXPORT_COMPILE_COMMANDS true) -set(CMAKE_BUILD_TYPE Release) -set(CMAKE_CXX_FLAGS_RELEASE "-O2") -set(CMAKE_CXX_STANDARD 20) - -option(GLFW_BUILD_DOCS OFF) -option(GLFW_BUILD_EXAMPLES OFF) -option(GLFW_BUILD_TESTS OFF) -option(GLFW_INSTALL OFF) - -find_package(glfw3 3.3 REQUIRED) -find_package(glm REQUIRED) - -# Glad -add_library(glad - STATIC - ${VENDOR_DIR}/glad/glad.c -) -target_include_directories(glad - PUBLIC - ${VENDOR_DIR} -) - -# STB -add_library(loaders - STATIC - ${VENDOR_DIR}/loaders/tinyobj.cpp - ${VENDOR_DIR}/loaders/stb_image.cpp -) -target_include_directories(loaders - PUBLIC - ${VENDOR_DIR} -) - -# somaesque -add_executable(${PROJECT_NAME}) -target_sources(${PROJECT_NAME} - PRIVATE - ${SRC_DIR}/main.cpp - ${SRC_DIR}/VoxelSpace.cpp - ${SRC_DIR}/VoxelSpace.h - ${SRC_DIR}/SomaSolve.cpp - ${SRC_DIR}/SomaSolve.h - ${SRC_DIR}/gfx/Texture.h - ${SRC_DIR}/gfx/Texture.cpp - ${SRC_DIR}/gfx/Mesh.h - ${SRC_DIR}/gfx/Mesh.cpp - ${SRC_DIR}/gfx/Shader.h - ${SRC_DIR}/gfx/Shader.cpp - ${SRC_DIR}/gfx/Color.h - ${SRC_DIR}/gfx/Color.cpp - ${SRC_DIR}/gfx/geometry.h - ${SRC_DIR}/gfx/geometry.cpp -) -target_link_libraries(${PROJECT_NAME} - PRIVATE - glfw - GL - X11 - pthread - Xrandr - dl - glm::glm - glad - loaders -) -target_include_directories(somaesque - PUBLIC - ${VENDOR_DIR}/KHR -) - -# TESTING -include(FetchContent) -FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip -) -FetchContent_MakeAvailable(googletest) - -enable_testing() -add_executable(tests - ${SRC_DIR}/tests.cpp - ${SRC_DIR}/VoxelSpace.cpp - ${SRC_DIR}/VoxelSpace.h -) - -target_link_libraries(tests - GTest::gtest_main -) - -include(GoogleTest) -gtest_discover_tests(tests) diff --git a/build b/build new file mode 100644 index 0000000..703d23a --- /dev/null +++ b/build @@ -0,0 +1,3 @@ +#!/bin/bash + +g++ -I ./ -g -g3 -lglfw -lGLU -lGL -lXrandr -lXxf86vm -lXi -lXinerama -lX11 -lrt -ldl -DOS_LINUX=1 -DENABLE_ASSERT=1 ./app.cpp -o ./target/app diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..4d507fb --- /dev/null +++ b/build.bat @@ -0,0 +1,27 @@ +@echo off + +if NOT EXIST .\target mkdir .\target + +set commonLinkerFlags=-opt:ref +set commonCompilerFlags=^ + -MT %= Make sure the C runtime library is statically linked =%^ + -Gm- %= Turns off incremental building =%^ + -nologo %= No one cares you made the compiler Microsoft =%^ + -Oi %= Always use intrinsics =%^ + -EHa- %= Disable exception handling =%^ + -GR- %= Never use runtime type info from C++ =%^ + -WX -W4 -wd4201 -wd4100 -wd4189 -wd4505 %= Compiler warnings, -WX warnings as errors, -W4 warning level 4, -wdXXXX disable warning XXXX =%^ + -DAPP_DEBUG=0 -DENABLE_ASSERT=1 -DOS_WINDOWS=1 %= Custom #defines =%^ + -D_CRT_SECURE_NO_WARNINGS=1^ + -FC %= Full path of source code file in diagnostics =%^ + -Zi %= Generate debugger info =% + +pushd .\target +cl %commonCompilerFlags% -Fe:.\app.exe ..\app.cpp /link -incremental:no %commonLinkerFlags% +popd + +exit /b + +:error +echo Failed with error #%errorlevel%. +exit /b %errorlevel% diff --git a/build.zig b/build.zig deleted file mode 100644 index ba9db36..0000000 --- a/build.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("std"); -const zmath = @import("lib/zmath/build.zig"); - -pub fn build(b: *std.Build) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. - - const target = b.standardTargetOptions(.{}); - - // Standard release options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. - const mode = b.standardOptimizeOption(.{}); - - const exe = b.addExecutable(.{ - .name = "somaesque-native-zig", - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, - .optimize = mode, - }); - exe.addIncludePath("/usr/local/include"); - - exe.linkLibC(); - exe.linkSystemLibrary("glfw3"); - exe.linkSystemLibrary("glm"); - exe.linkSystemLibrary("GL"); - exe.addIncludePath("lib/c"); - - exe.addCSourceFile("lib/c/glad/glad.c", &[_][]const u8{"-std=c11"}); - - exe.install(); - - // zmath - const zmath_pkg = zmath.package(b, target, mode, .{ - .options = .{ .enable_cross_platform_determinism = true }, - }); - zmath_pkg.link(exe); - - const run_cmd = exe.run(); - run_cmd.step.dependOn(b.getInstallStep()); - if (b.args) |args| { - run_cmd.addArgs(args); - } - - const run_step = b.step("run", "Run the app"); - run_step.dependOn(&run_cmd.step); - - //const exe_tests = b.addTest("src/main.zig"); - //exe_tests.setTarget(target); - //exe_tests.setBuildMode(mode); - - //const test_step = b.step("test", "Run unit tests"); - //test_step.dependOn(&exe_tests.step); -} diff --git a/lib/zmath/README.md b/lib/zmath/README.md deleted file mode 100644 index c11ef1f..0000000 --- a/lib/zmath/README.md +++ /dev/null @@ -1,138 +0,0 @@ -# zmath v0.9.6 - SIMD math library for game developers - -Tested on x86_64 and AArch64. - -Provides ~140 optimized routines and ~70 extensive tests. - -Can be used with any graphics API. - -Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig). - -Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig). - -An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn). - -## Getting started - -Copy `zmath` folder to a `libs` subdirectory of the root of your project. - -Then in your `build.zig` add: - -```zig -const std = @import("std"); -const zmath = @import("libs/zmath/build.zig"); - -pub fn build(b: *std.Build) void { - ... - const optimize = b.standardOptimizeOption(.{}); - const target = b.standardTargetOptions(.{}); - - zmath_pkg = zmath.package(b, target, optimize, .{ - .options = .{ .enable_cross_platform_determinism = true }, - }); - - zmath_pkg.link(exe); -} -``` - -Now in your code you may import and use zmath: - -```zig -const zm = @import("zmath"); - -pub fn main() !void { - // - // OpenGL/Vulkan example - // - const object_to_world = zm.rotationY(..); - const world_to_view = zm.lookAtRh( - zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position - zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point - zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point) - ); - // `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`) - const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0); - - const object_to_view = zm.mul(object_to_world, world_to_view); - const object_to_clip = zm.mul(object_to_view, view_to_clip); - - // Transposition is needed because GLSL uses column-major matrices by default - gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip)); - - // In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip; - - // - // DirectX example - // - const object_to_world = zm.rotationY(..); - const world_to_view = zm.lookAtLh( - zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position - zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point - zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point) - ); - const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0); - - const object_to_view = zm.mul(object_to_world, world_to_view); - const object_to_clip = zm.mul(object_to_view, view_to_clip); - - // Transposition is needed because HLSL uses column-major matrices by default - const mem = allocateUploadMemory(...); - zm.storeMat(mem, zm.transpose(object_to_clip)); - - // In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip); - - // - // 'WASD' camera movement example - // - { - const speed = zm.f32x4s(10.0); - const delta_time = zm.f32x4s(demo.frame_stats.delta_time); - const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw)); - var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform)); - - zm.storeArr3(&demo.camera.forward, forward); - - const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward)); - forward = speed * delta_time * forward; - - var cam_pos = zm.loadArr3(demo.camera.position); - - if (keyDown('W')) { - cam_pos += forward; - } else if (keyDown('S')) { - cam_pos -= forward; - } - if (keyDown('D')) { - cam_pos += right; - } else if (keyDown('A')) { - cam_pos -= right; - } - - zm.storeArr3(&demo.camera.position, cam_pos); - } - - // - // SIMD wave equation solver example (works with vector width 4, 8 and 16) - // 'T' can be F32x4, F32x8 or F32x16 - // - var z_index: i32 = 0; - while (z_index < grid_size) : (z_index += 1) { - const z = scale * @intToFloat(f32, z_index - grid_size / 2); - const vz = zm.splat(T, z); - - var x_index: i32 = 0; - while (x_index < grid_size) : (x_index += zm.veclen(T)) { - const x = scale * @intToFloat(f32, x_index - grid_size / 2); - const vx = zm.splat(T, x) + voffset * zm.splat(T, scale); - - const d = zm.sqrt(vx * vx + vz * vz); - const vy = zm.sin(d - vtime); - - const index = @intCast(usize, x_index + z_index * grid_size); - zm.store(xslice[index..], vx, 0); - zm.store(yslice[index..], vy, 0); - zm.store(zslice[index..], vz, 0); - } - } -} -``` diff --git a/lib/zmath/build.zig b/lib/zmath/build.zig deleted file mode 100644 index ce21bc1..0000000 --- a/lib/zmath/build.zig +++ /dev/null @@ -1,97 +0,0 @@ -const std = @import("std"); - -pub const Options = struct { - enable_cross_platform_determinism: bool = true, -}; - -pub const Package = struct { - options: Options, - zmath: *std.Build.Module, - zmath_options: *std.Build.Module, - - pub fn link(pkg: Package, exe: *std.Build.CompileStep) void { - exe.addModule("zmath", pkg.zmath); - exe.addModule("zmath_options", pkg.zmath_options); - } -}; - -pub fn package( - b: *std.Build, - _: std.zig.CrossTarget, - _: std.builtin.Mode, - args: struct { - options: Options = .{}, - }, -) Package { - const step = b.addOptions(); - step.addOption( - bool, - "enable_cross_platform_determinism", - args.options.enable_cross_platform_determinism, - ); - - const zmath_options = step.createModule(); - - const zmath = b.createModule(.{ - .source_file = .{ .path = thisDir() ++ "/src/main.zig" }, - .dependencies = &.{ - .{ .name = "zmath_options", .module = zmath_options }, - }, - }); - - return .{ - .options = args.options, - .zmath = zmath, - .zmath_options = zmath_options, - }; -} - -pub fn build(b: *std.Build) void { - const optimize = b.standardOptimizeOption(.{}); - const target = b.standardTargetOptions(.{}); - - const test_step = b.step("test", "Run zmath tests"); - test_step.dependOn(runTests(b, optimize, target)); - - const benchmark_step = b.step("benchmark", "Run zmath benchmarks"); - benchmark_step.dependOn(runBenchmarks(b, target)); -} - -pub fn runTests( - b: *std.Build, - optimize: std.builtin.Mode, - target: std.zig.CrossTarget, -) *std.Build.Step { - const tests = b.addTest(.{ - .name = "zmath-tests", - .root_source_file = .{ .path = thisDir() ++ "/src/main.zig" }, - .target = target, - .optimize = optimize, - }); - - const zmath_pkg = package(b, target, optimize, .{}); - tests.addModule("zmath_options", zmath_pkg.zmath_options); - - return &tests.run().step; -} - -pub fn runBenchmarks( - b: *std.Build, - target: std.zig.CrossTarget, -) *std.Build.Step { - const exe = b.addExecutable(.{ - .name = "zmath-benchmarks", - .root_source_file = .{ .path = thisDir() ++ "/src/benchmark.zig" }, - .target = target, - .optimize = .ReleaseFast, - }); - - const zmath_pkg = package(b, target, .ReleaseFast, .{}); - exe.addModule("zmath", zmath_pkg.zmath); - - return &exe.run().step; -} - -inline fn thisDir() []const u8 { - return comptime std.fs.path.dirname(@src().file) orelse "."; -} diff --git a/lib/zmath/src/benchmark.zig b/lib/zmath/src/benchmark.zig deleted file mode 100644 index 136e29d..0000000 --- a/lib/zmath/src/benchmark.zig +++ /dev/null @@ -1,469 +0,0 @@ -// ------------------------------------------------------------------------------------------------- -// zmath - benchmarks -// ------------------------------------------------------------------------------------------------- -// 'zig build benchmark' in the root project directory will build and run 'ReleaseFast' configuration. -// -// ------------------------------------------------------------------------------------------------- -// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f -// ------------------------------------------------------------------------------------------------- -// matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s -// cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s -// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s -// quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s -// wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s -// -// ------------------------------------------------------------------------------------------------- -// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0 -// ------------------------------------------------------------------------------------------------- -// matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s -// cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s -// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s -// quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s -// wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s -// -// ------------------------------------------------------------------------------------------------- -// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350 -// ------------------------------------------------------------------------------------------------- -// matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s -// cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s -// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s -// quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s -// wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s -// -// ------------------------------------------------------------------------------------------------- -// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f -// ------------------------------------------------------------------------------------------------- -// matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s -// cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s -// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s -// quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s -// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s -// -// ------------------------------------------------------------------------------------------------- - -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const allocator = gpa.allocator(); - - // m = mul(ma, mb); data set fits in L1 cache; AOS data layout. - try mat4MulBenchmark(allocator, 100_000); - - // v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout. - try cross3ScaleBiasBenchmark(allocator, 10_000); - - // v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout. - try cross3Dot3ScaleBiasBenchmark(allocator, 10_000); - - // q = qmul(qa, qb); data set fits in L1 cache; AOS data layout. - try quatBenchmark(allocator, 10_000); - - // d = sqrt(x * x + z * z); y = sin(d - t); SOA layout. - try waveBenchmark(allocator, 1_000); -} - -const std = @import("std"); -const time = std.time; -const Timer = time.Timer; -const zm = @import("zmath"); - -var prng = std.rand.DefaultPrng.init(0); -const random = prng.random(); - -noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { - std.debug.print("\n", .{}); - std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"}); - - var data0 = std.ArrayList([16]f32).init(allocator); - defer data0.deinit(); - var data1 = std.ArrayList([16]f32).init(allocator); - defer data1.deinit(); - - var i: usize = 0; - while (i < 64) : (i += 1) { - try data0.append([16]f32{ - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - }); - try data1.append([16]f32{ - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - random.float(f32), random.float(f32), random.float(f32), random.float(f32), - }); - } - - // Warmup, fills L1 cache. - i = 0; - while (i < 100) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const ma = zm.loadMat(a[0..]); - const mb = zm.loadMat(b[0..]); - const r = zm.mul(ma, mb); - std.mem.doNotOptimizeAway(&r); - } - } - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const r = [16]f32{ - a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12], - a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13], - a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14], - a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15], - a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12], - a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13], - a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14], - a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15], - a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12], - a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13], - a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14], - a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15], - a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12], - a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13], - a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14], - a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15], - }; - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const ma = zm.loadMat(a[0..]); - const mb = zm.loadMat(b[0..]); - const r = zm.mul(ma, mb); - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); - } -} - -noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { - std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"}); - - var data0 = std.ArrayList([3]f32).init(allocator); - defer data0.deinit(); - var data1 = std.ArrayList([3]f32).init(allocator); - defer data1.deinit(); - - var i: usize = 0; - while (i < 256) : (i += 1) { - try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); - try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); - } - - // Warmup, fills L1 cache. - i = 0; - while (i < 100) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const va = zm.loadArr3(a); - const vb = zm.loadArr3(b); - const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0); - std.mem.doNotOptimizeAway(&cp); - } - } - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const r = [3]f32{ - 0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0, - 0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0, - 0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0, - }; - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const va = zm.loadArr3(a); - const vb = zm.loadArr3(b); - const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0); - std.mem.doNotOptimizeAway(&cp); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); - } -} - -noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { - std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"}); - - var data0 = std.ArrayList([3]f32).init(allocator); - defer data0.deinit(); - var data1 = std.ArrayList([3]f32).init(allocator); - defer data1.deinit(); - - var i: usize = 0; - while (i < 256) : (i += 1) { - try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); - try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) }); - } - - // Warmup, fills L1 cache. - i = 0; - while (i < 100) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const va = zm.loadArr3(a); - const vb = zm.loadArr3(b); - const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0]; - std.mem.doNotOptimizeAway(&r); - } - } - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; - const r = [3]f32{ - d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0), - d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0), - d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0), - }; - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const va = zm.loadArr3(a); - const vb = zm.loadArr3(b); - const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)); - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); - } -} - -noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { - std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"}); - - var data0 = std.ArrayList([4]f32).init(allocator); - defer data0.deinit(); - var data1 = std.ArrayList([4]f32).init(allocator); - defer data1.deinit(); - - var i: usize = 0; - while (i < 256) : (i += 1) { - try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) }); - try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) }); - } - - // Warmup, fills L1 cache. - i = 0; - while (i < 100) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const va = zm.loadArr4(a); - const vb = zm.loadArr4(b); - const r = zm.qmul(va, vb); - std.mem.doNotOptimizeAway(&r); - } - } - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const r = [4]f32{ - (b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]), - (b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]), - (b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]), - (b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]), - }; - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); - } - - { - i = 0; - var timer = try Timer.start(); - const start = timer.lap(); - while (i < count) : (i += 1) { - for (data1.items) |b| { - for (data0.items) |a| { - const va = zm.loadArr4(a); - const vb = zm.loadArr4(b); - const r = zm.qmul(va, vb); - std.mem.doNotOptimizeAway(&r); - } - } - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); - } -} - -noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { - _ = allocator; - std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"}); - - const grid_size = 1024; - { - var t: f32 = 0.0; - - const scale: f32 = 0.05; - - var timer = try Timer.start(); - const start = timer.lap(); - - var iter: usize = 0; - while (iter < count) : (iter += 1) { - var z_index: i32 = 0; - while (z_index < grid_size) : (z_index += 1) { - const z = scale * @intToFloat(f32, z_index - grid_size / 2); - - var x_index: i32 = 0; - while (x_index < grid_size) : (x_index += 4) { - const x0 = scale * @intToFloat(f32, x_index + 0 - grid_size / 2); - const x1 = scale * @intToFloat(f32, x_index + 1 - grid_size / 2); - const x2 = scale * @intToFloat(f32, x_index + 2 - grid_size / 2); - const x3 = scale * @intToFloat(f32, x_index + 3 - grid_size / 2); - - const d0 = zm.sqrt(x0 * x0 + z * z); - const d1 = zm.sqrt(x1 * x1 + z * z); - const d2 = zm.sqrt(x2 * x2 + z * z); - const d3 = zm.sqrt(x3 * x3 + z * z); - - const y0 = zm.sin(d0 - t); - const y1 = zm.sin(d1 - t); - const y2 = zm.sin(d2 - t); - const y3 = zm.sin(d3 - t); - - std.mem.doNotOptimizeAway(&y0); - std.mem.doNotOptimizeAway(&y1); - std.mem.doNotOptimizeAway(&y2); - std.mem.doNotOptimizeAway(&y3); - } - } - t += 0.001; - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); - } - - { - const T = zm.F32x16; - - const static = struct { - const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - }; - const voffset = zm.load(static.offsets[0..], T, 0); - var vt = zm.splat(T, 0.0); - - const scale: f32 = 0.05; - - var timer = try Timer.start(); - const start = timer.lap(); - - var iter: usize = 0; - while (iter < count) : (iter += 1) { - var z_index: i32 = 0; - while (z_index < grid_size) : (z_index += 1) { - const z = scale * @intToFloat(f32, z_index - grid_size / 2); - const vz = zm.splat(T, z); - - var x_index: i32 = 0; - while (x_index < grid_size) : (x_index += zm.veclen(T)) { - const x = scale * @intToFloat(f32, x_index - grid_size / 2); - const vx = zm.splat(T, x) + voffset * zm.splat(T, scale); - - const d = zm.sqrt(vx * vx + vz * vz); - - const vy = zm.sin(d - vt); - - std.mem.doNotOptimizeAway(&vy); - } - } - vt += zm.splat(T, 0.001); - } - const end = timer.read(); - const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; - - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); - } -} diff --git a/lib/zmath/src/main.zig b/lib/zmath/src/main.zig deleted file mode 100644 index 5834745..0000000 --- a/lib/zmath/src/main.zig +++ /dev/null @@ -1,18 +0,0 @@ -//-------------------------------------------------------------------------------------------------- -// -// SIMD math library for game developers -// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath -// -// See zmath.zig for more details. -// See util.zig for additional functionality. -// -//-------------------------------------------------------------------------------------------------- -pub const version = @import("std").SemanticVersion{ .major = 0, .minor = 9, .patch = 6 }; - -pub usingnamespace @import("zmath.zig"); -pub const util = @import("util.zig"); - -// ensure transitive closure of test coverage -comptime { - _ = util; -} diff --git a/lib/zmath/src/util.zig b/lib/zmath/src/util.zig deleted file mode 100644 index aa79020..0000000 --- a/lib/zmath/src/util.zig +++ /dev/null @@ -1,182 +0,0 @@ -// ============================================================================== -// -// Collection of useful functions building on top of, and extending, core zmath. -// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath -// -// ------------------------------------------------------------------------------ -// 1. Matrix functions -// ------------------------------------------------------------------------------ -// -// As an example, in a left handed Y-up system: -// getAxisX is equivalent to the right vector -// getAxisY is equivalent to the up vector -// getAxisZ is equivalent to the forward vector -// -// getTranslationVec(m: Mat) Vec -// getAxisX(m: Mat) Vec -// getAxisY(m: Mat) Vec -// getAxisZ(m: Mat) Vec -// -// ============================================================================== - -const zm = @import("zmath.zig"); -const std = @import("std"); -const math = std.math; -const expect = std.testing.expect; - -pub fn getTranslationVec(m: zm.Mat) zm.Vec { - var translation = m[3]; - translation[3] = 0; - return translation; -} - -pub fn getScaleVec(m: zm.Mat) zm.Vec { - const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0]; - const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0]; - const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0]; - return zm.f32x4(scale_x, scale_y, scale_z, 0); -} - -pub fn getRotationQuat(_m: zm.Mat) zm.Quat { - // Ortho normalize given matrix. - const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0)); - const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0)); - const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0)); - var m = _m; - m[0][0] = c1[0]; - m[1][0] = c1[1]; - m[2][0] = c1[2]; - m[0][1] = c2[0]; - m[1][1] = c2[1]; - m[2][1] = c2[2]; - m[0][2] = c3[0]; - m[1][2] = c3[1]; - m[2][2] = c3[2]; - - // Extract rotation - return zm.quatFromMat(m); -} - -pub fn getAxisX(m: zm.Mat) zm.Vec { - return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0)); -} - -pub fn getAxisY(m: zm.Mat) zm.Vec { - return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0)); -} - -pub fn getAxisZ(m: zm.Mat) zm.Vec { - return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0)); -} - -test "zmath.util.mat.translation" { - // zig fmt: off - const mat_data = [18]f32{ - 1.0, - 2.0, 3.0, 4.0, 5.0, - 6.0, 7.0, 8.0, 9.0, - 10.0,11.0, 12.0,13.0, - 14.0, 15.0, 16.0, 17.0, - 18.0, - }; - // zig fmt: on - const mat = zm.loadMat(mat_data[1..]); - const translation = getTranslationVec(mat); - try expect(zm.approxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001)); -} - -test "zmath.util.mat.scale" { - const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8)); - const scale = getScaleVec(mat); - try expect(zm.approxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001)); -} - -test "zmath.util.mat.rotation" { - const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3); - const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8)); - const rotate_get = getRotationQuat(mat); - const v0 = zm.mul(zm.f32x4s(1), rotate_origin); - const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get)); - try expect(zm.approxEqAbs(v0, v1, 0.0001)); -} - -test "zmath.util.mat.z_vec" { - const degToRad = std.math.degreesToRadians; - var identity = zm.identity(); - var z_vec = getAxisZ(identity); - try expect(zm.approxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001)); - const rot_yaw = zm.rotationY(degToRad(f32, 90)); - identity = zm.mul(identity, rot_yaw); - z_vec = getAxisZ(identity); - try expect(zm.approxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001)); -} - -test "zmath.util.mat.y_vec" { - const degToRad = std.math.degreesToRadians; - var identity = zm.identity(); - var y_vec = getAxisY(identity); - try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01)); - const rot_yaw = zm.rotationY(degToRad(f32, 90)); - identity = zm.mul(identity, rot_yaw); - y_vec = getAxisY(identity); - try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01)); - const rot_pitch = zm.rotationX(degToRad(f32, 90)); - identity = zm.mul(identity, rot_pitch); - y_vec = getAxisY(identity); - try expect(zm.approxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01)); -} - -test "zmath.util.mat.right" { - const degToRad = std.math.degreesToRadians; - var identity = zm.identity(); - var right = getAxisX(identity); - try expect(zm.approxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01)); - const rot_yaw = zm.rotationY(degToRad(f32, 90)); - identity = zm.mul(identity, rot_yaw); - right = getAxisX(identity); - try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01)); - const rot_pitch = zm.rotationX(degToRad(f32, 90)); - identity = zm.mul(identity, rot_pitch); - right = getAxisX(identity); - try expect(zm.approxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01)); -} - -// ------------------------------------------------------------------------------ -// This software is available under 2 licenses -- choose whichever you prefer. -// ------------------------------------------------------------------------------ -// ALTERNATIVE A - MIT License -// Copyright (c) 2022 Michal Ziulek and Contributors -// Permission is hereby granted, free of charge, to any person obtaining identity copy of -// this software and associated documentation files (the "Software"), to deal in -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -// of the Software, and to permit persons to whom the Software is furnished to do -// so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -// ------------------------------------------------------------------------------ -// ALTERNATIVE B - Public Domain (www.unlicense.org) -// This is free and unencumbered software released into the public domain. -// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -// software, either in source code form or as identity compiled binary, for any purpose, -// commercial or non-commercial, and by any means. -// In jurisdictions that recognize copyright laws, the author or authors of this -// software dedicate any and all copyright interest in the software to the public -// domain. We make this dedication for the benefit of the public at large and to -// the detriment of our heirs and successors. We intend this dedication to be an -// overt act of relinquishment in perpetuity of all present and future rights to -// this software under copyright law. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// ------------------------------------------------------------------------------ diff --git a/lib/zmath/src/zmath.zig b/lib/zmath/src/zmath.zig deleted file mode 100644 index 383ed55..0000000 --- a/lib/zmath/src/zmath.zig +++ /dev/null @@ -1,4442 +0,0 @@ -// ============================================================================== -// -// SIMD math library for game developers -// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath -// -// Should work on all OSes supported by Zig. Works on x86_64 and ARM. -// Provides ~140 optimized routines and ~70 extensive tests. -// Can be used with any graphics API. -// -// zmath uses row-major matrices, row vectors (each row vector is stored in a SIMD register). -// Handedness is determined by which function version is used (Rh vs. Lh), -// otherwise the function works with either left-handed or right-handed view coordinates. -// -// const va = f32x4(1.0, 2.0, 3.0, 1.0); -// const vb = f32x4(-1.0, 1.0, -1.0, 1.0); -// const v0 = va + vb - f32x4(0.0, 1.0, 0.0, 1.0) * f32x4s(3.0); -// const v1 = cross3(va, vb) + f32x4(1.0, 1.0, 1.0, 1.0); -// const v2 = va + dot3(va, vb) / v1; // dotN() returns scalar replicated on all vector components -// -// const m = rotationX(math.pi * 0.25); -// const v = f32x4(...); -// const v0 = mul(v, m); // 'v' treated as a row vector -// const v1 = mul(m, v); // 'v' treated as a column vector -// const f = m[row][column]; -// -// const b = va < vb; -// if (all(b, 0)) { ... } // '0' means check all vector components; if all are 'true' -// if (all(b, 3)) { ... } // '3' means check first three vector components; if all first three are 'true' -// if (any(b, 0)) { ... } // '0' means check all vector components; if any is 'true' -// if (any(b, 3)) { ... } // '3' means check first three vector components; if any from first three is 'true' -// -// var v4 = load(mem[0..], F32x4, 0); -// var v8 = load(mem[100..], F32x8, 0); -// var v16 = load(mem[200..], F32x16, 0); -// -// var camera_position = [3]f32{ 1.0, 2.0, 3.0 }; -// var cam_pos = loadArr3(camera_position); -// ... -// storeArr3(&camera_position, cam_pos); -// -// v4 = sin(v4); // SIMDx4 -// v8 = cos(v8); // .x86_64 -> 2 x SIMDx4, .x86_64+avx+fma -> SIMDx8 -// v16 = atan(v16); // .x86_64 -> 4 x SIMDx4, .x86_64+avx+fma -> 2 x SIMDx8, .x86_64+avx512f -> SIMDx16 -// -// store(mem[0..], v4, 0); -// store(mem[100..], v8, 0); -// store(mem[200..], v16, 0); -// -// ------------------------------------------------------------------------------ -// 1. Initialization functions -// ------------------------------------------------------------------------------ -// -// f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4 -// f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8 -// f32x16(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32, -// e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16 -// -// f32x4s(e0: f32) F32x4 -// f32x8s(e0: f32) F32x8 -// f32x16s(e0: f32) F32x16 -// -// boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4 -// boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8 -// boolx16(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool, -// e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16 -// -// load(mem: []const f32, comptime T: type, comptime len: u32) T -// store(mem: []f32, v: anytype, comptime len: u32) void -// -// loadArr2(arr: [2]f32) F32x4 -// loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4 -// loadArr3(arr: [3]f32) F32x4 -// loadArr3w(arr: [3]f32, w: f32) F32x4 -// loadArr4(arr: [4]f32) F32x4 -// -// storeArr2(arr: *[2]f32, v: F32x4) void -// storeArr3(arr: *[3]f32, v: F32x4) void -// storeArr4(arr: *[4]f32, v: F32x4) void -// -// arr3Ptr(ptr: anytype) *const [3]f32 -// arrNPtr(ptr: anytype) [*]const f32 -// -// splat(comptime T: type, value: f32) T -// splatInt(comptime T: type, value: u32) T -// -// ------------------------------------------------------------------------------ -// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16) -// ------------------------------------------------------------------------------ -// -// all(vb: anytype, comptime len: u32) bool -// any(vb: anytype, comptime len: u32) bool -// -// isNearEqual(v0: F32xN, v1: F32xN, epsilon: F32xN) BoolxN -// isNan(v: F32xN) BoolxN -// isInf(v: F32xN) BoolxN -// isInBounds(v: F32xN, bounds: F32xN) BoolxN -// -// andInt(v0: F32xN, v1: F32xN) F32xN -// andNotInt(v0: F32xN, v1: F32xN) F32xN -// orInt(v0: F32xN, v1: F32xN) F32xN -// norInt(v0: F32xN, v1: F32xN) F32xN -// xorInt(v0: F32xN, v1: F32xN) F32xN -// -// minFast(v0: F32xN, v1: F32xN) F32xN -// maxFast(v0: F32xN, v1: F32xN) F32xN -// min(v0: F32xN, v1: F32xN) F32xN -// max(v0: F32xN, v1: F32xN) F32xN -// round(v: F32xN) F32xN -// floor(v: F32xN) F32xN -// trunc(v: F32xN) F32xN -// ceil(v: F32xN) F32xN -// clamp(v0: F32xN, v1: F32xN) F32xN -// clampFast(v0: F32xN, v1: F32xN) F32xN -// saturate(v: F32xN) F32xN -// saturateFast(v: F32xN) F32xN -// lerp(v0: F32xN, v1: F32xN, t: f32) F32xN -// lerpV(v0: F32xN, v1: F32xN, t: F32xN) F32xN -// lerpInverse(v0: F32xN, v1: F32xN, t: f32) F32xN -// lerpInverseV(v0: F32xN, v1: F32xN, t: F32xN) F32xN -// mapLinear(v: F32xN, min1: f32, max1: f32, min2: f32, max2: f32) F32xN -// mapLinearV(v: F32xN, min1: F32xN, max1: F32xN, min2: F32xN, max2: F32xN) F32xN -// sqrt(v: F32xN) F32xN -// abs(v: F32xN) F32xN -// mod(v0: F32xN, v1: F32xN) F32xN -// modAngle(v: F32xN) F32xN -// mulAdd(v0: F32xN, v1: F32xN, v2: F32xN) F32xN -// select(mask: BoolxN, v0: F32xN, v1: F32xN) -// sin(v: F32xN) F32xN -// cos(v: F32xN) F32xN -// sincos(v: F32xN) [2]F32xN -// asin(v: F32xN) F32xN -// acos(v: F32xN) F32xN -// atan(v: F32xN) F32xN -// atan2(vy: F32xN, vx: F32xN) F32xN -// cmulSoa(re0: F32xN, im0: F32xN, re1: F32xN, im1: F32xN) [2]F32xN -// -// ------------------------------------------------------------------------------ -// 3. 2D, 3D, 4D vector functions -// ------------------------------------------------------------------------------ -// -// swizzle(v: Vec, c, c, c, c) Vec (comptime c = .x | .y | .z | .w) -// dot2(v0: Vec, v1: Vec) F32x4 -// dot3(v0: Vec, v1: Vec) F32x4 -// dot4(v0: Vec, v1: Vec) F32x4 -// cross3(v0: Vec, v1: Vec) Vec -// lengthSq2(v: Vec) F32x4 -// lengthSq3(v: Vec) F32x4 -// lengthSq4(v: Vec) F32x4 -// length2(v: Vec) F32x4 -// length3(v: Vec) F32x4 -// length4(v: Vec) F32x4 -// normalize2(v: Vec) Vec -// normalize3(v: Vec) Vec -// normalize4(v: Vec) Vec -// -// vecToArr2(v: Vec) [2]f32 -// vecToArr3(v: Vec) [3]f32 -// vecToArr4(v: Vec) [4]f32 -// -// ------------------------------------------------------------------------------ -// 4. Matrix functions -// ------------------------------------------------------------------------------ -// -// identity() Mat -// mul(m0: Mat, m1: Mat) Mat -// mul(s: f32, m: Mat) Mat -// mul(m: Mat, s: f32) Mat -// mul(v: Vec, m: Mat) Vec -// mul(m: Mat, v: Vec) Vec -// transpose(m: Mat) Mat -// rotationX(angle: f32) Mat -// rotationY(angle: f32) Mat -// rotationZ(angle: f32) Mat -// translation(x: f32, y: f32, z: f32) Mat -// translationV(v: Vec) Mat -// scaling(x: f32, y: f32, z: f32) Mat -// scalingV(v: Vec) Mat -// lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat -// lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat -// lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat -// lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat -// perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat -// perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat -// perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat -// perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat -// orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat -// orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat -// orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat -// orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat -// orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat -// orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat -// orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat -// orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat -// determinant(m: Mat) F32x4 -// inverse(m: Mat) Mat -// inverseDet(m: Mat, det: ?*F32x4) Mat -// matToQuat(m: Mat) Quat -// matFromAxisAngle(axis: Vec, angle: f32) Mat -// matFromNormAxisAngle(axis: Vec, angle: f32) Mat -// matFromQuat(quat: Quat) Mat -// matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat -// matFromRollPitchYawV(angles: Vec) Mat -// -// loadMat(mem: []const f32) Mat -// loadMat43(mem: []const f32) Mat -// loadMat34(mem: []const f32) Mat -// storeMat(mem: []f32, m: Mat) void -// storeMat43(mem: []f32, m: Mat) void -// storeMat34(mem: []f32, m: Mat) void -// -// matToArr(m: Mat) [16]f32 -// matToArr43(m: Mat) [12]f32 -// matToArr34(m: Mat) [12]f32 -// -// ------------------------------------------------------------------------------ -// 5. Quaternion functions -// ------------------------------------------------------------------------------ -// -// qmul(q0: Quat, q1: Quat) Quat -// qidentity() Quat -// conjugate(quat: Quat) Quat -// inverse(q: Quat) Quat -// slerp(q0: Quat, q1: Quat, t: f32) Quat -// slerpV(q0: Quat, q1: Quat, t: F32x4) Quat -// quatToMat(quat: Quat) Mat -// quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void -// quatFromMat(m: Mat) Quat -// quatFromAxisAngle(axis: Vec, angle: f32) Quat -// quatFromNormAxisAngle(axis: Vec, angle: f32) Quat -// quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat -// quatFromRollPitchYawV(angles: Vec) Quat -// -// ------------------------------------------------------------------------------ -// 6. Color functions -// ------------------------------------------------------------------------------ -// -// adjustSaturation(color: F32x4, saturation: f32) F32x4 -// adjustContrast(color: F32x4, contrast: f32) F32x4 -// rgbToHsl(rgb: F32x4) F32x4 -// hslToRgb(hsl: F32x4) F32x4 -// rgbToHsv(rgb: F32x4) F32x4 -// hsvToRgb(hsv: F32x4) F32x4 -// rgbToSrgb(rgb: F32x4) F32x4 -// srgbToRgb(srgb: F32x4) F32x4 -// -// ------------------------------------------------------------------------------ -// X. Misc functions -// ------------------------------------------------------------------------------ -// -// linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4 -// sin(v: f32) f32 -// cos(v: f32) f32 -// sincos(v: f32) [2]f32 -// asin(v: f32) f32 -// acos(v: f32) f32 -// -// fftInitUnityTable(unitytable: []F32x4) void -// fft(re: []F32x4, im: []F32x4, unitytable: []const F32x4) void -// ifft(re: []F32x4, im: []const F32x4, unitytable: []const F32x4) void -// -// ============================================================================== - -// Fundamental types -pub const F32x4 = @Vector(4, f32); -pub const F32x8 = @Vector(8, f32); -pub const F32x16 = @Vector(16, f32); -pub const Boolx4 = @Vector(4, bool); -pub const Boolx8 = @Vector(8, bool); -pub const Boolx16 = @Vector(16, bool); - -// "Higher-level" aliases -pub const Vec = F32x4; -pub const Mat = [4]F32x4; -pub const Quat = F32x4; - -const builtin = @import("builtin"); -const std = @import("std"); -const math = std.math; -const assert = std.debug.assert; -const expect = std.testing.expect; - -const cpu_arch = builtin.cpu.arch; -const has_avx = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx) else false; -const has_avx512f = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f) else false; -const has_fma = if (cpu_arch == .x86_64) std.Target.x86.featureSetHas(builtin.cpu.features, .fma) else false; -// ------------------------------------------------------------------------------ -// -// 1. Initialization functions -// -// ------------------------------------------------------------------------------ -pub inline fn f32x4(e0: f32, e1: f32, e2: f32, e3: f32) F32x4 { - return .{ e0, e1, e2, e3 }; -} -pub inline fn f32x8(e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32) F32x8 { - return .{ e0, e1, e2, e3, e4, e5, e6, e7 }; -} -// zig fmt: off -pub inline fn f32x16( - e0: f32, e1: f32, e2: f32, e3: f32, e4: f32, e5: f32, e6: f32, e7: f32, - e8: f32, e9: f32, ea: f32, eb: f32, ec: f32, ed: f32, ee: f32, ef: f32) F32x16 { - return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef }; -} -// zig fmt: on - -pub inline fn f32x4s(e0: f32) F32x4 { - return splat(F32x4, e0); -} -pub inline fn f32x8s(e0: f32) F32x8 { - return splat(F32x8, e0); -} -pub inline fn f32x16s(e0: f32) F32x16 { - return splat(F32x16, e0); -} - -pub inline fn boolx4(e0: bool, e1: bool, e2: bool, e3: bool) Boolx4 { - return .{ e0, e1, e2, e3 }; -} -pub inline fn boolx8(e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool) Boolx8 { - return .{ e0, e1, e2, e3, e4, e5, e6, e7 }; -} -// zig fmt: off -pub inline fn boolx16( - e0: bool, e1: bool, e2: bool, e3: bool, e4: bool, e5: bool, e6: bool, e7: bool, - e8: bool, e9: bool, ea: bool, eb: bool, ec: bool, ed: bool, ee: bool, ef: bool) Boolx16 { - return .{ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, ea, eb, ec, ed, ee, ef }; -} -// zig fmt: on - -pub inline fn veclen(comptime T: type) comptime_int { - return @typeInfo(T).Vector.len; -} - -pub inline fn splat(comptime T: type, value: f32) T { - return @splat(veclen(T), value); -} -pub inline fn splatInt(comptime T: type, value: u32) T { - return @splat(veclen(T), @bitCast(f32, value)); -} - -pub fn load(mem: []const f32, comptime T: type, comptime len: u32) T { - var v = splat(T, 0.0); - comptime var loop_len = if (len == 0) veclen(T) else len; - comptime var i: u32 = 0; - inline while (i < loop_len) : (i += 1) { - v[i] = mem[i]; - } - return v; -} -test "zmath.load" { - const a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 }; - var ptr = &a; - var i: u32 = 0; - const v0 = load(a[i..], F32x4, 2); - try expect(approxEqAbs(v0, F32x4{ 1.0, 2.0, 0.0, 0.0 }, 0.0)); - i += 2; - const v1 = load(a[i .. i + 2], F32x4, 2); - try expect(approxEqAbs(v1, F32x4{ 3.0, 4.0, 0.0, 0.0 }, 0.0)); - const v2 = load(a[5..7], F32x4, 2); - try expect(approxEqAbs(v2, F32x4{ 6.0, 7.0, 0.0, 0.0 }, 0.0)); - const v3 = load(ptr[1..], F32x4, 2); - try expect(approxEqAbs(v3, F32x4{ 2.0, 3.0, 0.0, 0.0 }, 0.0)); - i += 1; - const v4 = load(ptr[i .. i + 2], F32x4, 2); - try expect(approxEqAbs(v4, F32x4{ 4.0, 5.0, 0.0, 0.0 }, 0.0)); -} - -pub fn store(mem: []f32, v: anytype, comptime len: u32) void { - const T = @TypeOf(v); - comptime var loop_len = if (len == 0) veclen(T) else len; - comptime var i: u32 = 0; - inline while (i < loop_len) : (i += 1) { - mem[i] = v[i]; - } -} -test "zmath.store" { - var a = [7]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 }; - const v = load(a[1..], F32x4, 3); - store(a[2..], v, 4); - try expect(a[0] == 1.0); - try expect(a[1] == 2.0); - try expect(a[2] == 2.0); - try expect(a[3] == 3.0); - try expect(a[4] == 4.0); - try expect(a[5] == 0.0); -} - -pub inline fn loadArr2(arr: [2]f32) F32x4 { - return f32x4(arr[0], arr[1], 0.0, 0.0); -} -pub inline fn loadArr2zw(arr: [2]f32, z: f32, w: f32) F32x4 { - return f32x4(arr[0], arr[1], z, w); -} -pub inline fn loadArr3(arr: [3]f32) F32x4 { - return f32x4(arr[0], arr[1], arr[2], 0.0); -} -pub inline fn loadArr3w(arr: [3]f32, w: f32) F32x4 { - return f32x4(arr[0], arr[1], arr[2], w); -} -pub inline fn loadArr4(arr: [4]f32) F32x4 { - return f32x4(arr[0], arr[1], arr[2], arr[3]); -} - -pub inline fn storeArr2(arr: *[2]f32, v: F32x4) void { - arr.* = .{ v[0], v[1] }; -} -pub inline fn storeArr3(arr: *[3]f32, v: F32x4) void { - arr.* = .{ v[0], v[1], v[2] }; -} -pub inline fn storeArr4(arr: *[4]f32, v: F32x4) void { - arr.* = .{ v[0], v[1], v[2], v[3] }; -} - -pub inline fn arr3Ptr(ptr: anytype) *const [3]f32 { - comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer); - const T = std.meta.Child(@TypeOf(ptr)); - comptime assert(T == F32x4); - return @ptrCast(*const [3]f32, ptr); -} - -pub inline fn arrNPtr(ptr: anytype) [*]const f32 { - comptime assert(@typeInfo(@TypeOf(ptr)) == .Pointer); - const T = std.meta.Child(@TypeOf(ptr)); - comptime assert(T == Mat or T == F32x4 or T == F32x8 or T == F32x16); - return @ptrCast([*]const f32, ptr); -} -test "zmath.arrNPtr" { - { - const mat = identity(); - const f32ptr = arrNPtr(&mat); - try expect(f32ptr[0] == 1.0); - try expect(f32ptr[5] == 1.0); - try expect(f32ptr[10] == 1.0); - try expect(f32ptr[15] == 1.0); - } - { - const v8 = f32x8s(1.0); - const f32ptr = arrNPtr(&v8); - try expect(f32ptr[1] == 1.0); - try expect(f32ptr[7] == 1.0); - } -} - -test "zmath.loadArr" { - { - const camera_position = [3]f32{ 1.0, 2.0, 3.0 }; - const simd_reg = loadArr3(camera_position); - try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 0.0), 0.0)); - } - { - const camera_position = [3]f32{ 1.0, 2.0, 3.0 }; - const simd_reg = loadArr3w(camera_position, 1.0); - try expect(approxEqAbs(simd_reg, f32x4(1.0, 2.0, 3.0, 1.0), 0.0)); - } -} - -pub inline fn vecToArr2(v: Vec) [2]f32 { - return .{ v[0], v[1] }; -} -pub inline fn vecToArr3(v: Vec) [3]f32 { - return .{ v[0], v[1], v[2] }; -} -pub inline fn vecToArr4(v: Vec) [4]f32 { - return .{ v[0], v[1], v[2], v[3] }; -} -// ------------------------------------------------------------------------------ -// -// 2. Functions that work on all vector components (F32xN = F32x4 or F32x8 or F32x16) -// -// ------------------------------------------------------------------------------ -pub fn all(vb: anytype, comptime len: u32) bool { - const T = @TypeOf(vb); - if (len > veclen(T)) { - @compileError("zmath.all(): 'len' is greater than vector len of type " ++ @typeName(T)); - } - comptime var loop_len = if (len == 0) veclen(T) else len; - const ab: [veclen(T)]bool = vb; - comptime var i: u32 = 0; - var result = true; - inline while (i < loop_len) : (i += 1) { - result = result and ab[i]; - } - return result; -} -test "zmath.all" { - try expect(all(boolx8(true, true, true, true, true, false, true, false), 5) == true); - try expect(all(boolx8(true, true, true, true, true, false, true, false), 6) == false); - try expect(all(boolx8(true, true, true, true, false, false, false, false), 4) == true); - try expect(all(boolx4(true, true, true, false), 3) == true); - try expect(all(boolx4(true, true, true, false), 1) == true); - try expect(all(boolx4(true, false, false, false), 1) == true); - try expect(all(boolx4(false, true, false, false), 1) == false); - try expect(all(boolx8(true, true, true, true, true, false, true, false), 0) == false); - try expect(all(boolx4(false, true, false, false), 0) == false); - try expect(all(boolx4(true, true, true, true), 0) == true); -} - -pub fn any(vb: anytype, comptime len: u32) bool { - const T = @TypeOf(vb); - if (len > veclen(T)) { - @compileError("zmath.any(): 'len' is greater than vector len of type " ++ @typeName(T)); - } - comptime var loop_len = if (len == 0) veclen(T) else len; - const ab: [veclen(T)]bool = vb; - comptime var i: u32 = 0; - var result = false; - inline while (i < loop_len) : (i += 1) { - result = result or ab[i]; - } - return result; -} -test "zmath.any" { - try expect(any(boolx8(true, true, true, true, true, false, true, false), 0) == true); - try expect(any(boolx8(false, false, false, true, true, false, true, false), 3) == false); - try expect(any(boolx8(false, false, false, false, false, true, false, false), 4) == false); -} - -pub inline fn isNearEqual( - v0: anytype, - v1: anytype, - epsilon: anytype, -) @Vector(veclen(@TypeOf(v0)), bool) { - const T = @TypeOf(v0, v1, epsilon); - const delta = v0 - v1; - const temp = maxFast(delta, splat(T, 0.0) - delta); - return temp <= epsilon; -} -test "zmath.isNearEqual" { - if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest; - { - const v0 = f32x4(1.0, 2.0, -3.0, 4.001); - const v1 = f32x4(1.0, 2.1, 3.0, 4.0); - const b = isNearEqual(v0, v1, splat(F32x4, 0.01)); - try expect(@reduce(.And, b == boolx4(true, false, false, true))); - } - { - const v0 = f32x8(1.0, 2.0, -3.0, 4.001, 1.001, 2.3, -0.0, 0.0); - const v1 = f32x8(1.0, 2.1, 3.0, 4.0, -1.001, 2.1, 0.0, 0.0); - const b = isNearEqual(v0, v1, splat(F32x8, 0.01)); - try expect(@reduce(.And, b == boolx8(true, false, false, true, false, false, true, true))); - } - try expect(all(isNearEqual( - splat(F32x4, math.inf_f32), - splat(F32x4, math.inf_f32), - splat(F32x4, 0.0001), - ), 0) == false); - try expect(all(isNearEqual( - splat(F32x4, -math.inf_f32), - splat(F32x4, math.inf_f32), - splat(F32x4, 0.0001), - ), 0) == false); - try expect(all(isNearEqual( - splat(F32x4, -math.inf_f32), - splat(F32x4, -math.inf_f32), - splat(F32x4, 0.0001), - ), 0) == false); - try expect(all(isNearEqual( - splat(F32x4, -math.nan_f32), - splat(F32x4, math.inf_f32), - splat(F32x4, 0.0001), - ), 0) == false); -} - -pub inline fn isNan( - v: anytype, -) @Vector(veclen(@TypeOf(v)), bool) { - return v != v; -} -test "zmath.isNan" { - { - const v0 = f32x4(math.inf_f32, math.nan_f32, math.nan_f32, 7.0); - const b = isNan(v0); - try expect(@reduce(.And, b == boolx4(false, true, true, false))); - } - { - const v0 = f32x8(0, math.nan_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0); - const b = isNan(v0); - try expect(@reduce(.And, b == boolx8(false, true, false, false, false, true, true, false))); - } -} - -pub inline fn isInf( - v: anytype, -) @Vector(veclen(@TypeOf(v)), bool) { - const T = @TypeOf(v); - return abs(v) == splat(T, math.inf_f32); -} -test "zmath.isInf" { - { - const v0 = f32x4(math.inf_f32, math.nan_f32, math.qnan_f32, 7.0); - const b = isInf(v0); - try expect(@reduce(.And, b == boolx4(true, false, false, false))); - } - { - const v0 = f32x8(0, math.inf_f32, 0, 0, math.inf_f32, math.nan_f32, math.qnan_f32, 7.0); - const b = isInf(v0); - try expect(@reduce(.And, b == boolx8(false, true, false, false, true, false, false, false))); - } -} - -pub inline fn isInBounds( - v: anytype, - bounds: anytype, -) @Vector(veclen(@TypeOf(v)), bool) { - const T = @TypeOf(v, bounds); - const Tu = @Vector(veclen(T), u1); - const Tr = @Vector(veclen(T), bool); - - // 2 x cmpleps, xorps, load, andps - const b0 = v <= bounds; - const b1 = (bounds * splat(T, -1.0)) <= v; - const b0u = @bitCast(Tu, b0); - const b1u = @bitCast(Tu, b1); - return @bitCast(Tr, b0u & b1u); -} -test "zmath.isInBounds" { - { - const v0 = f32x4(0.5, -2.0, -1.0, 1.9); - const v1 = f32x4(-1.6, -2.001, -1.0, 1.9); - const bounds = f32x4(1.0, 2.0, 1.0, 2.0); - const b0 = isInBounds(v0, bounds); - const b1 = isInBounds(v1, bounds); - try expect(@reduce(.And, b0 == boolx4(true, true, true, true))); - try expect(@reduce(.And, b1 == boolx4(false, false, true, true))); - } - { - const v0 = f32x8(2.0, 1.0, 2.0, 1.0, 0.5, -2.0, -1.0, 1.9); - const bounds = f32x8(1.0, 1.0, 1.0, math.inf_f32, 1.0, math.nan_f32, 1.0, 2.0); - const b0 = isInBounds(v0, bounds); - try expect(@reduce(.And, b0 == boolx8(false, true, false, true, true, false, true, true))); - } -} - -pub inline fn andInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - const Tu = @Vector(veclen(T), u32); - const v0u = @bitCast(Tu, v0); - const v1u = @bitCast(Tu, v1); - return @bitCast(T, v0u & v1u); // andps -} -test "zmath.andInt" { - { - const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); - const v1 = f32x4(1.0, 2.0, 3.0, math.inf_f32); - const v = andInt(v0, v1); - try expect(v[3] == math.inf_f32); - try expect(approxEqAbs(v, f32x4(0.0, 2.0, 0.0, math.inf_f32), 0.0)); - } - { - const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); - const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, math.inf_f32); - const v = andInt(v0, v1); - try expect(v[7] == math.inf_f32); - try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 0.0, 2.0, 0.0, math.inf_f32), 0.0)); - } -} - -pub inline fn andNotInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - const Tu = @Vector(veclen(T), u32); - const v0u = @bitCast(Tu, v0); - const v1u = @bitCast(Tu, v1); - return @bitCast(T, ~v0u & v1u); // andnps -} -test "zmath.andNotInt" { - { - const v0 = f32x4(1.0, 2.0, 3.0, 4.0); - const v1 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); - const v = andNotInt(v1, v0); - try expect(approxEqAbs(v, f32x4(1.0, 0.0, 3.0, 0.0), 0.0)); - } - { - const v0 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0); - const v1 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, @bitCast(f32, ~@as(u32, 0))); - const v = andNotInt(v1, v0); - try expect(approxEqAbs(v, f32x8(0, 0, 0, 0, 1.0, 0.0, 3.0, 0.0), 0.0)); - } -} - -pub inline fn orInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - const Tu = @Vector(veclen(T), u32); - const v0u = @bitCast(Tu, v0); - const v1u = @bitCast(Tu, v1); - return @bitCast(T, v0u | v1u); // orps -} -test "zmath.orInt" { - { - const v0 = f32x4(0, @bitCast(f32, ~@as(u32, 0)), 0, 0); - const v1 = f32x4(1.0, 2.0, 3.0, 4.0); - const v = orInt(v0, v1); - try expect(v[0] == 1.0); - try expect(@bitCast(u32, v[1]) == ~@as(u32, 0)); - try expect(v[2] == 3.0); - try expect(v[3] == 4.0); - } - { - const v0 = f32x8(0, 0, 0, 0, 0, @bitCast(f32, ~@as(u32, 0)), 0, 0); - const v1 = f32x8(0, 0, 0, 0, 1.0, 2.0, 3.0, 4.0); - const v = orInt(v0, v1); - try expect(v[4] == 1.0); - try expect(@bitCast(u32, v[5]) == ~@as(u32, 0)); - try expect(v[6] == 3.0); - try expect(v[7] == 4.0); - } -} - -pub inline fn norInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - const Tu = @Vector(veclen(T), u32); - const v0u = @bitCast(Tu, v0); - const v1u = @bitCast(Tu, v1); - return @bitCast(T, ~(v0u | v1u)); // por, pcmpeqd, pxor -} - -pub inline fn xorInt(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - const Tu = @Vector(veclen(T), u32); - const v0u = @bitCast(Tu, v0); - const v1u = @bitCast(Tu, v1); - return @bitCast(T, v0u ^ v1u); // xorps -} -test "zmath.xorInt" { - { - const v0 = f32x4(1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0); - const v1 = f32x4(1.0, 0, 0, 0); - const v = xorInt(v0, v1); - try expect(v[0] == 0.0); - try expect(@bitCast(u32, v[1]) == ~@as(u32, 0)); - try expect(v[2] == 0.0); - try expect(v[3] == 0.0); - } - { - const v0 = f32x8(0, 0, 0, 0, 1.0, @bitCast(f32, ~@as(u32, 0)), 0, 0); - const v1 = f32x8(0, 0, 0, 0, 1.0, 0, 0, 0); - const v = xorInt(v0, v1); - try expect(v[4] == 0.0); - try expect(@bitCast(u32, v[5]) == ~@as(u32, 0)); - try expect(v[6] == 0.0); - try expect(v[7] == 0.0); - } -} - -pub inline fn minFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - return select(v0 < v1, v0, v1); // minps -} -test "zmath.minFast" { - { - const v0 = f32x4(1.0, 3.0, 2.0, 7.0); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = minFast(v0, v1); - try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0)); - } - { - const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = minFast(v0, v1); - try expect(v[0] == 1.0); - try expect(v[1] == 1.0); - try expect(!math.isNan(v[1])); - try expect(v[2] == 4.0); - try expect(v[3] == math.inf_f32); - try expect(!math.isNan(v[3])); - } -} - -pub inline fn maxFast(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - return select(v0 > v1, v0, v1); // maxps -} -test "zmath.maxFast" { - { - const v0 = f32x4(1.0, 3.0, 2.0, 7.0); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = maxFast(v0, v1); - try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0)); - } - { - const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = maxFast(v0, v1); - try expect(v[0] == 2.0); - try expect(v[1] == 1.0); - try expect(v[2] == 5.0); - try expect(v[3] == math.inf_f32); - try expect(!math.isNan(v[3])); - } -} - -pub inline fn min(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - // This will handle inf & nan - return @min(v0, v1); // minps, cmpunordps, andps, andnps, orps -} -test "zmath.min" { - if (builtin.target.os.tag == .macos) return error.SkipZigTest; - { - const v0 = f32x4(1.0, 3.0, 2.0, 7.0); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = min(v0, v1); - try expect(approxEqAbs(v, f32x4(1.0, 1.0, 2.0, 7.0), 0.0)); - } - { - const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0); - const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32); - const v = min(v0, v1); - try expect(approxEqAbs(v, f32x8(0.0, 0.0, -2.0, 0.0, 1.0, 1.0, 2.0, 7.0), 0.0)); - } - { - const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = min(v0, v1); - try expect(v[0] == 1.0); - try expect(v[1] == 1.0); - try expect(!math.isNan(v[1])); - try expect(v[2] == 4.0); - try expect(v[3] == math.inf_f32); - try expect(!math.isNan(v[3])); - } - { - const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32); - const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32); - const v = min(v0, v1); - try expect(v[0] == -math.inf_f32); - try expect(v[1] == -math.inf_f32); - try expect(v[2] == math.inf_f32); - try expect(!math.isNan(v[2])); - try expect(math.isNan(v[3])); - try expect(!math.isInf(v[3])); - } -} - -pub inline fn max(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - // This will handle inf & nan - return @max(v0, v1); // maxps, cmpunordps, andps, andnps, orps -} -test "zmath.max" { - if (builtin.target.os.tag == .macos) return error.SkipZigTest; - { - const v0 = f32x4(1.0, 3.0, 2.0, 7.0); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = max(v0, v1); - try expect(approxEqAbs(v, f32x4(2.0, 3.0, 4.0, math.inf_f32), 0.0)); - } - { - const v0 = f32x8(0, 0, -2.0, 0, 1.0, 3.0, 2.0, 7.0); - const v1 = f32x8(0, 1.0, 0, 0, 2.0, 1.0, 4.0, math.inf_f32); - const v = max(v0, v1); - try expect(approxEqAbs(v, f32x8(0.0, 1.0, 0.0, 0.0, 2.0, 3.0, 4.0, math.inf_f32), 0.0)); - } - { - const v0 = f32x4(1.0, math.nan_f32, 5.0, math.qnan_f32); - const v1 = f32x4(2.0, 1.0, 4.0, math.inf_f32); - const v = max(v0, v1); - try expect(v[0] == 2.0); - try expect(v[1] == 1.0); - try expect(v[2] == 5.0); - try expect(v[3] == math.inf_f32); - try expect(!math.isNan(v[3])); - } - { - const v0 = f32x4(-math.inf_f32, math.inf_f32, math.inf_f32, math.qnan_f32); - const v1 = f32x4(math.qnan_f32, -math.inf_f32, math.qnan_f32, math.nan_f32); - const v = max(v0, v1); - try expect(v[0] == -math.inf_f32); - try expect(v[1] == math.inf_f32); - try expect(v[2] == math.inf_f32); - try expect(!math.isNan(v[2])); - try expect(math.isNan(v[3])); - try expect(!math.isInf(v[3])); - } -} - -pub fn round(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - if (cpu_arch == .x86_64 and has_avx) { - if (T == F32x4) { - return asm ("vroundps $0, %%xmm0, %%xmm0" - : [ret] "={xmm0}" (-> T), - : [v] "{xmm0}" (v), - ); - } else if (T == F32x8) { - return asm ("vroundps $0, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> T), - : [v] "{ymm0}" (v), - ); - } else if (T == F32x16 and has_avx512f) { - return asm ("vrndscaleps $0, %%zmm0, %%zmm0" - : [ret] "={zmm0}" (-> T), - : [v] "{zmm0}" (v), - ); - } else if (T == F32x16 and !has_avx512f) { - const arr: [16]f32 = v; - var ymm0 = @as(F32x8, arr[0..8].*); - var ymm1 = @as(F32x8, arr[8..16].*); - ymm0 = asm ("vroundps $0, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> F32x8), - : [v] "{ymm0}" (ymm0), - ); - ymm1 = asm ("vroundps $0, %%ymm1, %%ymm1" - : [ret] "={ymm1}" (-> F32x8), - : [v] "{ymm1}" (ymm1), - ); - return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); - } - } else { - const sign = andInt(v, splatNegativeZero(T)); - const magic = orInt(splatNoFraction(T), sign); - var r1 = v + magic; - r1 = r1 - magic; - const r2 = abs(v); - const mask = r2 <= splatNoFraction(T); - return select(mask, r1, v); - } -} -test "zmath.round" { - { - try expect(all(round(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); - try expect(all(round(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); - try expect(all(isNan(round(splat(F32x4, math.nan_f32))), 0)); - try expect(all(isNan(round(splat(F32x4, -math.nan_f32))), 0)); - try expect(all(isNan(round(splat(F32x4, math.qnan_f32))), 0)); - try expect(all(isNan(round(splat(F32x4, -math.qnan_f32))), 0)); - } - { - var v = round(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); - try expect(approxEqAbs( - v, - f32x16(1.0, -1.0, -2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 6.0, 8.0, 9.0, 10.0, 11.0, 13.0, 13.0), - 0.0, - )); - } - var v = round(f32x4(1.1, -1.1, -1.5, 1.5)); - try expect(approxEqAbs(v, f32x4(1.0, -1.0, -2.0, 2.0), 0.0)); - - const v1 = f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32); - v = round(v1); - try expect(v[3] == math.inf_f32); - try expect(approxEqAbs(v, f32x4(-10_000_000.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); - - const v2 = f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32); - v = round(v2); - try expect(math.isNan(v2[0])); - try expect(math.isNan(v2[1])); - try expect(math.isNan(v2[2])); - try expect(v2[3] == -math.inf_f32); - - const v3 = f32x4(1001.5, -201.499, -10000.99, -101.5); - v = round(v3); - try expect(approxEqAbs(v, f32x4(1002.0, -201.0, -10001.0, -102.0), 0.0)); - - const v4 = f32x4(-1_388_609.9, 1_388_609.5, 1_388_109.01, 2_388_609.5); - v = round(v4); - try expect(approxEqAbs(v, f32x4(-1_388_610.0, 1_388_610.0, 1_388_109.0, 2_388_610.0), 0.0)); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const vr = round(splat(F32x4, f)); - const fr = @round(splat(F32x4, f)); - const vr8 = round(splat(F32x8, f)); - const fr8 = @round(splat(F32x8, f)); - const vr16 = round(splat(F32x16, f)); - const fr16 = @round(splat(F32x16, f)); - try expect(approxEqAbs(vr, fr, 0.0)); - try expect(approxEqAbs(vr8, fr8, 0.0)); - try expect(approxEqAbs(vr16, fr16, 0.0)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -pub fn trunc(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - if (cpu_arch == .x86_64 and has_avx) { - if (T == F32x4) { - return asm ("vroundps $3, %%xmm0, %%xmm0" - : [ret] "={xmm0}" (-> T), - : [v] "{xmm0}" (v), - ); - } else if (T == F32x8) { - return asm ("vroundps $3, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> T), - : [v] "{ymm0}" (v), - ); - } else if (T == F32x16 and has_avx512f) { - return asm ("vrndscaleps $3, %%zmm0, %%zmm0" - : [ret] "={zmm0}" (-> T), - : [v] "{zmm0}" (v), - ); - } else if (T == F32x16 and !has_avx512f) { - const arr: [16]f32 = v; - var ymm0 = @as(F32x8, arr[0..8].*); - var ymm1 = @as(F32x8, arr[8..16].*); - ymm0 = asm ("vroundps $3, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> F32x8), - : [v] "{ymm0}" (ymm0), - ); - ymm1 = asm ("vroundps $3, %%ymm1, %%ymm1" - : [ret] "={ymm1}" (-> F32x8), - : [v] "{ymm1}" (ymm1), - ); - return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); - } - } else { - const mask = abs(v) < splatNoFraction(T); - const result = floatToIntAndBack(v); - return select(mask, result, v); - } -} -test "zmath.trunc" { - { - try expect(all(trunc(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); - try expect(all(trunc(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); - try expect(all(isNan(trunc(splat(F32x4, math.nan_f32))), 0)); - try expect(all(isNan(trunc(splat(F32x4, -math.nan_f32))), 0)); - try expect(all(isNan(trunc(splat(F32x4, math.qnan_f32))), 0)); - try expect(all(isNan(trunc(splat(F32x4, -math.qnan_f32))), 0)); - } - { - var v = trunc(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); - try expect(approxEqAbs( - v, - f32x16(1.0, -1.0, -1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0), - 0.0, - )); - } - var v = trunc(f32x4(1.1, -1.1, -1.5, 1.5)); - try expect(approxEqAbs(v, f32x4(1.0, -1.0, -1.0, 1.0), 0.0)); - - v = trunc(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32)); - try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); - - v = trunc(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32)); - try expect(math.isNan(v[0])); - try expect(math.isNan(v[1])); - try expect(math.isNan(v[2])); - try expect(v[3] == -math.inf_f32); - - v = trunc(f32x4(1000.5001, -201.499, -10000.99, 100.750001)); - try expect(approxEqAbs(v, f32x4(1000.0, -201.0, -10000.0, 100.0), 0.0)); - - v = trunc(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5)); - try expect(approxEqAbs(v, f32x4(-7_388_609.0, 7_388_609.0, 8_388_109.0, -8_388_509.0), 0.0)); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const vr = trunc(splat(F32x4, f)); - const fr = @trunc(splat(F32x4, f)); - const vr8 = trunc(splat(F32x8, f)); - const fr8 = @trunc(splat(F32x8, f)); - const vr16 = trunc(splat(F32x16, f)); - const fr16 = @trunc(splat(F32x16, f)); - try expect(approxEqAbs(vr, fr, 0.0)); - try expect(approxEqAbs(vr8, fr8, 0.0)); - try expect(approxEqAbs(vr16, fr16, 0.0)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -pub fn floor(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - if (cpu_arch == .x86_64 and has_avx) { - if (T == F32x4) { - return asm ("vroundps $1, %%xmm0, %%xmm0" - : [ret] "={xmm0}" (-> T), - : [v] "{xmm0}" (v), - ); - } else if (T == F32x8) { - return asm ("vroundps $1, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> T), - : [v] "{ymm0}" (v), - ); - } else if (T == F32x16 and has_avx512f) { - return asm ("vrndscaleps $1, %%zmm0, %%zmm0" - : [ret] "={zmm0}" (-> T), - : [v] "{zmm0}" (v), - ); - } else if (T == F32x16 and !has_avx512f) { - const arr: [16]f32 = v; - var ymm0 = @as(F32x8, arr[0..8].*); - var ymm1 = @as(F32x8, arr[8..16].*); - ymm0 = asm ("vroundps $1, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> F32x8), - : [v] "{ymm0}" (ymm0), - ); - ymm1 = asm ("vroundps $1, %%ymm1, %%ymm1" - : [ret] "={ymm1}" (-> F32x8), - : [v] "{ymm1}" (ymm1), - ); - return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); - } - } else { - const mask = abs(v) < splatNoFraction(T); - var result = floatToIntAndBack(v); - const larger_mask = result > v; - const larger = select(larger_mask, splat(T, -1.0), splat(T, 0.0)); - result = result + larger; - return select(mask, result, v); - } -} -test "zmath.floor" { - { - try expect(all(floor(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); - try expect(all(floor(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); - try expect(all(isNan(floor(splat(F32x4, math.nan_f32))), 0)); - try expect(all(isNan(floor(splat(F32x4, -math.nan_f32))), 0)); - try expect(all(isNan(floor(splat(F32x4, math.qnan_f32))), 0)); - try expect(all(isNan(floor(splat(F32x4, -math.qnan_f32))), 0)); - } - { - var v = floor(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); - try expect(approxEqAbs( - v, - f32x16(1.0, -2.0, -2.0, 1.0, 2.0, 2.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0), - 0.0, - )); - } - var v = floor(f32x4(1.5, -1.5, -1.7, -2.1)); - try expect(approxEqAbs(v, f32x4(1.0, -2.0, -2.0, -3.0), 0.0)); - - v = floor(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32)); - try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); - - v = floor(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32)); - try expect(math.isNan(v[0])); - try expect(math.isNan(v[1])); - try expect(math.isNan(v[2])); - try expect(v[3] == -math.inf_f32); - - v = floor(f32x4(1000.5001, -201.499, -10000.99, 100.75001)); - try expect(approxEqAbs(v, f32x4(1000.0, -202.0, -10001.0, 100.0), 0.0)); - - v = floor(f32x4(-7_388_609.5, 7_388_609.1, 8_388_109.5, -8_388_509.5)); - try expect(approxEqAbs(v, f32x4(-7_388_610.0, 7_388_609.0, 8_388_109.0, -8_388_510.0), 0.0)); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const vr = floor(splat(F32x4, f)); - const fr = @floor(splat(F32x4, f)); - const vr8 = floor(splat(F32x8, f)); - const fr8 = @floor(splat(F32x8, f)); - const vr16 = floor(splat(F32x16, f)); - const fr16 = @floor(splat(F32x16, f)); - try expect(approxEqAbs(vr, fr, 0.0)); - try expect(approxEqAbs(vr8, fr8, 0.0)); - try expect(approxEqAbs(vr16, fr16, 0.0)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -pub fn ceil(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - if (cpu_arch == .x86_64 and has_avx) { - if (T == F32x4) { - return asm ("vroundps $2, %%xmm0, %%xmm0" - : [ret] "={xmm0}" (-> T), - : [v] "{xmm0}" (v), - ); - } else if (T == F32x8) { - return asm ("vroundps $2, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> T), - : [v] "{ymm0}" (v), - ); - } else if (T == F32x16 and has_avx512f) { - return asm ("vrndscaleps $2, %%zmm0, %%zmm0" - : [ret] "={zmm0}" (-> T), - : [v] "{zmm0}" (v), - ); - } else if (T == F32x16 and !has_avx512f) { - const arr: [16]f32 = v; - var ymm0 = @as(F32x8, arr[0..8].*); - var ymm1 = @as(F32x8, arr[8..16].*); - ymm0 = asm ("vroundps $2, %%ymm0, %%ymm0" - : [ret] "={ymm0}" (-> F32x8), - : [v] "{ymm0}" (ymm0), - ); - ymm1 = asm ("vroundps $2, %%ymm1, %%ymm1" - : [ret] "={ymm1}" (-> F32x8), - : [v] "{ymm1}" (ymm1), - ); - return @shuffle(f32, ymm0, ymm1, [16]i32{ 0, 1, 2, 3, 4, 5, 6, 7, -1, -2, -3, -4, -5, -6, -7, -8 }); - } - } else { - const mask = abs(v) < splatNoFraction(T); - var result = floatToIntAndBack(v); - const smaller_mask = result < v; - const smaller = select(smaller_mask, splat(T, -1.0), splat(T, 0.0)); - result = result - smaller; - return select(mask, result, v); - } -} -test "zmath.ceil" { - { - try expect(all(ceil(splat(F32x4, math.inf_f32)) == splat(F32x4, math.inf_f32), 0)); - try expect(all(ceil(splat(F32x4, -math.inf_f32)) == splat(F32x4, -math.inf_f32), 0)); - try expect(all(isNan(ceil(splat(F32x4, math.nan_f32))), 0)); - try expect(all(isNan(ceil(splat(F32x4, -math.nan_f32))), 0)); - try expect(all(isNan(ceil(splat(F32x4, math.qnan_f32))), 0)); - try expect(all(isNan(ceil(splat(F32x4, -math.qnan_f32))), 0)); - } - { - var v = ceil(f32x16(1.1, -1.1, -1.5, 1.5, 2.1, 2.8, 2.9, 4.1, 5.8, 6.1, 7.9, 8.9, 10.1, 11.2, 12.7, 13.1)); - try expect(approxEqAbs( - v, - f32x16(2.0, -1.0, -1.0, 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 11.0, 12.0, 13.0, 14.0), - 0.0, - )); - } - var v = ceil(f32x4(1.5, -1.5, -1.7, -2.1)); - try expect(approxEqAbs(v, f32x4(2.0, -1.0, -1.0, -2.0), 0.0)); - - v = ceil(f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32)); - try expect(approxEqAbs(v, f32x4(-10_000_002.1, -math.inf_f32, 10_000_001.5, math.inf_f32), 0.0)); - - v = ceil(f32x4(-math.qnan_f32, math.qnan_f32, math.nan_f32, -math.inf_f32)); - try expect(math.isNan(v[0])); - try expect(math.isNan(v[1])); - try expect(math.isNan(v[2])); - try expect(v[3] == -math.inf_f32); - - v = ceil(f32x4(1000.5001, -201.499, -10000.99, 100.75001)); - try expect(approxEqAbs(v, f32x4(1001.0, -201.0, -10000.0, 101.0), 0.0)); - - v = ceil(f32x4(-1_388_609.5, 1_388_609.1, 1_388_109.9, -1_388_509.9)); - try expect(approxEqAbs(v, f32x4(-1_388_609.0, 1_388_610.0, 1_388_110.0, -1_388_509.0), 0.0)); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const vr = ceil(splat(F32x4, f)); - const fr = @ceil(splat(F32x4, f)); - const vr8 = ceil(splat(F32x8, f)); - const fr8 = @ceil(splat(F32x8, f)); - const vr16 = ceil(splat(F32x16, f)); - const fr16 = @ceil(splat(F32x16, f)); - try expect(approxEqAbs(vr, fr, 0.0)); - try expect(approxEqAbs(vr8, fr8, 0.0)); - try expect(approxEqAbs(vr16, fr16, 0.0)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -pub inline fn clamp(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) { - var result = max(vmin, v); - result = min(vmax, result); - return result; -} -test "zmath.clamp" { - if (builtin.target.os.tag == .macos) return error.SkipZigTest; - { - const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); - const v = clamp(v0, splat(F32x4, -0.5), splat(F32x4, 0.5)); - try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001)); - } - { - const v0 = f32x8(-2.0, 0.25, -0.25, 100.0, -1.0, 0.2, 1.1, -0.3); - const v = clamp(v0, splat(F32x8, -0.5), splat(F32x8, 0.5)); - try expect(approxEqAbs(v, f32x8(-0.5, 0.25, -0.25, 0.5, -0.5, 0.2, 0.5, -0.3), 0.0001)); - } - { - const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32); - const v = clamp(v0, f32x4(-100.0, 0.0, -100.0, 0.0), f32x4(0.0, 100.0, 0.0, 100.0)); - try expect(approxEqAbs(v, f32x4(-100.0, 100.0, -100.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32); - const v = clamp(v0, splat(F32x4, -1.0), splat(F32x4, 1.0)); - try expect(approxEqAbs(v, f32x4(1.0, 1.0, -1.0, -1.0), 0.0001)); - } -} - -pub inline fn clampFast(v: anytype, vmin: anytype, vmax: anytype) @TypeOf(v, vmin, vmax) { - var result = maxFast(vmin, v); - result = minFast(vmax, result); - return result; -} -test "zmath.clampFast" { - { - const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); - const v = clampFast(v0, splat(F32x4, -0.5), splat(F32x4, 0.5)); - try expect(approxEqAbs(v, f32x4(-0.5, 0.2, 0.5, -0.3), 0.0001)); - } -} - -pub inline fn saturate(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - var result = max(v, splat(T, 0.0)); - result = min(result, splat(T, 1.0)); - return result; -} -test "zmath.saturate" { - if (builtin.target.os.tag == .macos) return error.SkipZigTest; - { - const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); - const v = saturate(v0); - try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001)); - } - { - const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3); - const v = saturate(v0); - try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32); - const v = saturate(v0); - try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32); - const v = saturate(v0); - try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001)); - } -} - -pub inline fn saturateFast(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - var result = maxFast(v, splat(T, 0.0)); - result = minFast(result, splat(T, 1.0)); - return result; -} -test "zmath.saturateFast" { - { - const v0 = f32x4(-1.0, 0.2, 1.1, -0.3); - const v = saturateFast(v0); - try expect(approxEqAbs(v, f32x4(0.0, 0.2, 1.0, 0.0), 0.0001)); - } - { - const v0 = f32x8(0.0, 0.0, 2.0, -2.0, -1.0, 0.2, 1.1, -0.3); - const v = saturateFast(v0); - try expect(approxEqAbs(v, f32x8(0.0, 0.0, 1.0, 0.0, 0.0, 0.2, 1.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(-math.inf_f32, math.inf_f32, math.nan_f32, math.qnan_f32); - const v = saturateFast(v0); - try expect(approxEqAbs(v, f32x4(0.0, 1.0, 0.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(math.inf_f32, math.inf_f32, -math.nan_f32, -math.qnan_f32); - const v = saturateFast(v0); - try expect(approxEqAbs(v, f32x4(1.0, 1.0, 0.0, 0.0), 0.0001)); - } -} - -pub inline fn sqrt(v: anytype) @TypeOf(v) { - return @sqrt(v); // sqrtps -} - -pub inline fn abs(v: anytype) @TypeOf(v) { - return @fabs(v); // load, andps -} - -pub inline fn select(mask: anytype, v0: anytype, v1: anytype) @TypeOf(v0, v1) { - return @select(f32, mask, v0, v1); -} - -pub inline fn lerp(v0: anytype, v1: anytype, t: f32) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - return v0 + (v1 - v0) * splat(T, t); // subps, shufps, addps, mulps -} - -pub inline fn lerpV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) { - return v0 + (v1 - v0) * t; // subps, addps, mulps -} - -pub inline fn lerpInverse(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1) { - const T = @TypeOf(v0, v1); - return (splat(T, t) - v0) / (v1 - v0); -} - -pub inline fn lerpInverseV(v0: anytype, v1: anytype, t: anytype) @TypeOf(v0, v1, t) { - return (t - v0) / (v1 - v0); -} -test "zmath.lerpInverse" { - try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 10.0), 0, 0.0005)); - try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 100.0), 1, 0.0005)); - try expect(math.approxEqAbs(f32, lerpInverseV(10.0, 100.0, 55.0), 0.5, 0.05)); - try expect(approxEqAbs(lerpInverse(f32x4(0, 0, 10, 10), f32x4(100, 200, 100, 100), 10.0), f32x4(0.1, 0.05, 0, 0), 0.0005)); -} - -/// To transform a vector of values from one range to another. -pub inline fn mapLinear(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v) { - const T = @TypeOf(v); - const min1V = splat(T, min1); - const max1V = splat(T, max1); - const min2V = splat(T, min2); - const max2V = splat(T, max2); - const dV = max1V - min1V; - return min2V + (v - min1V) * (max2V - min2V) / dV; -} - -pub inline fn mapLinearV(v: anytype, min1: anytype, max1: anytype, min2: anytype, max2: anytype) @TypeOf(v, min1, max1, min2, max2) { - const d = max1 - min1; - return min2 + (v - min1) * (max2 - min2) / d; -} -test "zmath.mapLinear" { - try expect(math.approxEqAbs(f32, mapLinearV(0, 0, 1.2, 10, 100), 10, 0.0005)); - try expect(math.approxEqAbs(f32, mapLinearV(1.2, 0, 1.2, 10, 100), 100, 0.0005)); - try expect(math.approxEqAbs(f32, mapLinearV(0.6, 0, 1.2, 10, 100), 55, 0.0005)); - try expect(approxEqAbs(mapLinearV(splat(F32x4, 0), splat(F32x4, 0), splat(F32x4, 1.2), splat(F32x4, 10), splat(F32x4, 100)), splat(F32x4, 10), 0.0005)); - try expect(approxEqAbs(mapLinear(f32x4(0, 0, 0.6, 1.2), 0, 1.2, 10, 100), f32x4(10, 10, 55, 100), 0.0005)); -} - -pub const F32x4Component = enum { x, y, z, w }; - -pub inline fn swizzle( - v: F32x4, - comptime x: F32x4Component, - comptime y: F32x4Component, - comptime z: F32x4Component, - comptime w: F32x4Component, -) F32x4 { - return @shuffle(f32, v, undefined, [4]i32{ @enumToInt(x), @enumToInt(y), @enumToInt(z), @enumToInt(w) }); -} - -pub inline fn mod(v0: anytype, v1: anytype) @TypeOf(v0, v1) { - // vdivps, vroundps, vmulps, vsubps - return v0 - v1 * trunc(v0 / v1); -} -test "zmath.mod" { - if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest; - try expect(approxEqAbs(mod(splat(F32x4, 3.1), splat(F32x4, 1.7)), splat(F32x4, 1.4), 0.0005)); - try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, 2.0)), splat(F32x4, -1.0), 0.0005)); - try expect(approxEqAbs(mod(splat(F32x4, -3.0), splat(F32x4, -2.0)), splat(F32x4, -1.0), 0.0005)); - try expect(approxEqAbs(mod(splat(F32x4, 3.0), splat(F32x4, -2.0)), splat(F32x4, 1.0), 0.0005)); - try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, 1.0))), 0)); - try expect(all(isNan(mod(splat(F32x4, -math.inf_f32), splat(F32x4, 123.456))), 0)); - try expect(all(isNan(mod(splat(F32x4, math.nan_f32), splat(F32x4, 123.456))), 0)); - try expect(all(isNan(mod(splat(F32x4, math.qnan_f32), splat(F32x4, 123.456))), 0)); - try expect(all(isNan(mod(splat(F32x4, -math.qnan_f32), splat(F32x4, 123.456))), 0)); - try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.inf_f32))), 0)); - try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, -math.inf_f32))), 0)); - try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32))), 0)); - try expect(all(isNan(mod(splat(F32x4, 123.456), splat(F32x4, math.nan_f32))), 0)); - try expect(all(isNan(mod(splat(F32x4, math.inf_f32), splat(F32x4, math.nan_f32))), 0)); -} - -pub fn modAngle(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - return switch (T) { - f32 => modAngle32(v), - F32x4, F32x8, F32x16 => modAngle32xN(v), - else => @compileError("zmath.modAngle() not implemented for " ++ @typeName(T)), - }; -} - -pub inline fn modAngle32xN(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - return v - splat(T, math.tau) * round(v * splat(T, 1.0 / math.tau)); // 2 x vmulps, 2 x load, vroundps, vaddps -} -test "zmath.modAngle" { - try expect(approxEqAbs(modAngle(splat(F32x4, math.tau)), splat(F32x4, 0.0), 0.0005)); - try expect(approxEqAbs(modAngle(splat(F32x4, 0.0)), splat(F32x4, 0.0), 0.0005)); - try expect(approxEqAbs(modAngle(splat(F32x4, math.pi)), splat(F32x4, math.pi), 0.0005)); - try expect(approxEqAbs(modAngle(splat(F32x4, 11 * math.pi)), splat(F32x4, math.pi), 0.0005)); - try expect(approxEqAbs(modAngle(splat(F32x4, 3.5 * math.pi)), splat(F32x4, -0.5 * math.pi), 0.0005)); - try expect(approxEqAbs(modAngle(splat(F32x4, 2.5 * math.pi)), splat(F32x4, 0.5 * math.pi), 0.0005)); -} - -pub inline fn mulAdd(v0: anytype, v1: anytype, v2: anytype) @TypeOf(v0, v1, v2) { - const T = @TypeOf(v0, v1, v2); - if (@import("zmath_options").enable_cross_platform_determinism) { - return v0 * v1 + v2; // Compiler will generate mul, add sequence (no fma even if the target supports it). - } else { - if (cpu_arch == .x86_64 and has_avx and has_fma) { - return @mulAdd(T, v0, v1, v2); - } else { - // NOTE(mziulek): On .x86_64 without HW fma instructions @mulAdd maps to really slow code! - return v0 * v1 + v2; - } - } -} - -fn sin32xN(v: anytype) @TypeOf(v) { - // 11-degree minimax approximation - const T = @TypeOf(v); - - var x = modAngle(v); - const sign = andInt(x, splatNegativeZero(T)); - const c = orInt(sign, splat(T, math.pi)); - const absx = andNotInt(sign, x); - const rflx = c - x; - const comp = absx <= splat(T, 0.5 * math.pi); - x = select(comp, x, rflx); - const x2 = x * x; - - var result = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06)); - result = mulAdd(result, x2, splat(T, -0.00019840874)); - result = mulAdd(result, x2, splat(T, 0.0083333310)); - result = mulAdd(result, x2, splat(T, -0.16666667)); - result = mulAdd(result, x2, splat(T, 1.0)); - return x * result; -} -test "zmath.sin" { - const epsilon = 0.0001; - - try expect(approxEqAbs(sin(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 1.0), epsilon)); - try expect(approxEqAbs(sin(splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon)); - try expect(approxEqAbs(sin(splat(F32x4, -0.0)), splat(F32x4, -0.0), epsilon)); - try expect(approxEqAbs(sin(splat(F32x4, 89.123)), splat(F32x4, 0.916166), epsilon)); - try expect(approxEqAbs(sin(splat(F32x8, 89.123)), splat(F32x8, 0.916166), epsilon)); - try expect(approxEqAbs(sin(splat(F32x16, 89.123)), splat(F32x16, 0.916166), epsilon)); - try expect(all(isNan(sin(splat(F32x4, math.inf_f32))), 0) == true); - try expect(all(isNan(sin(splat(F32x4, -math.inf_f32))), 0) == true); - try expect(all(isNan(sin(splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(sin(splat(F32x4, math.qnan_f32))), 0) == true); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const vr = sin(splat(F32x4, f)); - const fr = @sin(splat(F32x4, f)); - const vr8 = sin(splat(F32x8, f)); - const fr8 = @sin(splat(F32x8, f)); - const vr16 = sin(splat(F32x16, f)); - const fr16 = @sin(splat(F32x16, f)); - try expect(approxEqAbs(vr, fr, epsilon)); - try expect(approxEqAbs(vr8, fr8, epsilon)); - try expect(approxEqAbs(vr16, fr16, epsilon)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -fn cos32xN(v: anytype) @TypeOf(v) { - // 10-degree minimax approximation - const T = @TypeOf(v); - - var x = modAngle(v); - var sign = andInt(x, splatNegativeZero(T)); - const c = orInt(sign, splat(T, math.pi)); - const absx = andNotInt(sign, x); - const rflx = c - x; - const comp = absx <= splat(T, 0.5 * math.pi); - x = select(comp, x, rflx); - sign = select(comp, splat(T, 1.0), splat(T, -1.0)); - const x2 = x * x; - - var result = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05)); - result = mulAdd(result, x2, splat(T, -0.0013888378)); - result = mulAdd(result, x2, splat(T, 0.041666638)); - result = mulAdd(result, x2, splat(T, -0.5)); - result = mulAdd(result, x2, splat(T, 1.0)); - return sign * result; -} -test "zmath.cos" { - const epsilon = 0.0001; - - try expect(approxEqAbs(cos(splat(F32x4, 0.5 * math.pi)), splat(F32x4, 0.0), epsilon)); - try expect(approxEqAbs(cos(splat(F32x4, 0.0)), splat(F32x4, 1.0), epsilon)); - try expect(approxEqAbs(cos(splat(F32x4, -0.0)), splat(F32x4, 1.0), epsilon)); - try expect(all(isNan(cos(splat(F32x4, math.inf_f32))), 0) == true); - try expect(all(isNan(cos(splat(F32x4, -math.inf_f32))), 0) == true); - try expect(all(isNan(cos(splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(cos(splat(F32x4, math.qnan_f32))), 0) == true); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const vr = cos(splat(F32x4, f)); - const fr = @cos(splat(F32x4, f)); - const vr8 = cos(splat(F32x8, f)); - const fr8 = @cos(splat(F32x8, f)); - const vr16 = cos(splat(F32x16, f)); - const fr16 = @cos(splat(F32x16, f)); - try expect(approxEqAbs(vr, fr, epsilon)); - try expect(approxEqAbs(vr8, fr8, epsilon)); - try expect(approxEqAbs(vr16, fr16, epsilon)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -pub fn sin(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - return switch (T) { - f32 => sin32(v), - F32x4, F32x8, F32x16 => sin32xN(v), - else => @compileError("zmath.sin() not implemented for " ++ @typeName(T)), - }; -} - -pub fn cos(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - return switch (T) { - f32 => cos32(v), - F32x4, F32x8, F32x16 => cos32xN(v), - else => @compileError("zmath.cos() not implemented for " ++ @typeName(T)), - }; -} - -pub fn sincos(v: anytype) [2]@TypeOf(v) { - const T = @TypeOf(v); - return switch (T) { - f32 => sincos32(v), - F32x4, F32x8, F32x16 => sincos32xN(v), - else => @compileError("zmath.sincos() not implemented for " ++ @typeName(T)), - }; -} - -pub fn asin(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - return switch (T) { - f32 => asin32(v), - F32x4, F32x8, F32x16 => asin32xN(v), - else => @compileError("zmath.asin() not implemented for " ++ @typeName(T)), - }; -} - -pub fn acos(v: anytype) @TypeOf(v) { - const T = @TypeOf(v); - return switch (T) { - f32 => acos32(v), - F32x4, F32x8, F32x16 => acos32xN(v), - else => @compileError("zmath.acos() not implemented for " ++ @typeName(T)), - }; -} - -fn sincos32xN(v: anytype) [2]@TypeOf(v) { - const T = @TypeOf(v); - - var x = modAngle(v); - var sign = andInt(x, splatNegativeZero(T)); - const c = orInt(sign, splat(T, math.pi)); - const absx = andNotInt(sign, x); - const rflx = c - x; - const comp = absx <= splat(T, 0.5 * math.pi); - x = select(comp, x, rflx); - sign = select(comp, splat(T, 1.0), splat(T, -1.0)); - const x2 = x * x; - - var sresult = mulAdd(splat(T, -2.3889859e-08), x2, splat(T, 2.7525562e-06)); - sresult = mulAdd(sresult, x2, splat(T, -0.00019840874)); - sresult = mulAdd(sresult, x2, splat(T, 0.0083333310)); - sresult = mulAdd(sresult, x2, splat(T, -0.16666667)); - sresult = x * mulAdd(sresult, x2, splat(T, 1.0)); - - var cresult = mulAdd(splat(T, -2.6051615e-07), x2, splat(T, 2.4760495e-05)); - cresult = mulAdd(cresult, x2, splat(T, -0.0013888378)); - cresult = mulAdd(cresult, x2, splat(T, 0.041666638)); - cresult = mulAdd(cresult, x2, splat(T, -0.5)); - cresult = sign * mulAdd(cresult, x2, splat(T, 1.0)); - - return .{ sresult, cresult }; -} -test "zmath.sincos32xN" { - const epsilon = 0.0001; - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const sc = sincos(splat(F32x4, f)); - const sc8 = sincos(splat(F32x8, f)); - const sc16 = sincos(splat(F32x16, f)); - const s4 = @sin(splat(F32x4, f)); - const s8 = @sin(splat(F32x8, f)); - const s16 = @sin(splat(F32x16, f)); - const c4 = @cos(splat(F32x4, f)); - const c8 = @cos(splat(F32x8, f)); - const c16 = @cos(splat(F32x16, f)); - try expect(approxEqAbs(sc[0], s4, epsilon)); - try expect(approxEqAbs(sc8[0], s8, epsilon)); - try expect(approxEqAbs(sc16[0], s16, epsilon)); - try expect(approxEqAbs(sc[1], c4, epsilon)); - try expect(approxEqAbs(sc8[1], c8, epsilon)); - try expect(approxEqAbs(sc16[1], c16, epsilon)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -fn asin32xN(v: anytype) @TypeOf(v) { - // 7-degree minimax approximation - const T = @TypeOf(v); - - const x = abs(v); - const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x)); - - var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901)); - t0 = mulAdd(t0, x, splat(T, -0.0170881256)); - t0 = mulAdd(t0, x, splat(T, 0.0308918810)); - t0 = mulAdd(t0, x, splat(T, -0.0501743046)); - t0 = mulAdd(t0, x, splat(T, 0.0889789874)); - t0 = mulAdd(t0, x, splat(T, -0.2145988016)); - t0 = root * mulAdd(t0, x, splat(T, 1.5707963050)); - - const t1 = splat(T, math.pi) - t0; - return splat(T, 0.5 * math.pi) - select(v >= splat(T, 0.0), t0, t1); -} - -fn acos32xN(v: anytype) @TypeOf(v) { - // 7-degree minimax approximation - const T = @TypeOf(v); - - const x = abs(v); - const root = sqrt(maxFast(splat(T, 0.0), splat(T, 1.0) - x)); - - var t0 = mulAdd(splat(T, -0.0012624911), x, splat(T, 0.0066700901)); - t0 = mulAdd(t0, x, splat(T, -0.0170881256)); - t0 = mulAdd(t0, x, splat(T, 0.0308918810)); - t0 = mulAdd(t0, x, splat(T, -0.0501743046)); - t0 = mulAdd(t0, x, splat(T, 0.0889789874)); - t0 = mulAdd(t0, x, splat(T, -0.2145988016)); - t0 = root * mulAdd(t0, x, splat(T, 1.5707963050)); - - const t1 = splat(T, math.pi) - t0; - return select(v >= splat(T, 0.0), t0, t1); -} - -pub fn atan(v: anytype) @TypeOf(v) { - // 17-degree minimax approximation - const T = @TypeOf(v); - - const vabs = abs(v); - const vinv = splat(T, 1.0) / v; - var sign = select(v > splat(T, 1.0), splat(T, 1.0), splat(T, -1.0)); - const comp = vabs <= splat(T, 1.0); - sign = select(comp, splat(T, 0.0), sign); - const x = select(comp, v, vinv); - const x2 = x * x; - - var result = mulAdd(splat(T, 0.0028662257), x2, splat(T, -0.0161657367)); - result = mulAdd(result, x2, splat(T, 0.0429096138)); - result = mulAdd(result, x2, splat(T, -0.0752896400)); - result = mulAdd(result, x2, splat(T, 0.1065626393)); - result = mulAdd(result, x2, splat(T, -0.1420889944)); - result = mulAdd(result, x2, splat(T, 0.1999355085)); - result = mulAdd(result, x2, splat(T, -0.3333314528)); - result = x * mulAdd(result, x2, splat(T, 1.0)); - - const result1 = sign * splat(T, 0.5 * math.pi) - result; - return select(sign == splat(T, 0.0), result, result1); -} -test "zmath.atan" { - const epsilon = 0.0001; - { - const v = f32x4(0.25, 0.5, 1.0, 1.25); - const e = f32x4(math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3])); - try expect(approxEqAbs(e, atan(v), epsilon)); - } - { - const v = f32x8(-0.25, 0.5, -1.0, 1.25, 100.0, -200.0, 300.0, 400.0); - // zig fmt: off - const e = f32x8( - math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]), - math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]), - ); - // zig fmt: on - try expect(approxEqAbs(e, atan(v), epsilon)); - } - { - // zig fmt: off - const v = f32x16( - -0.25, 0.5, -1.0, 0.0, 0.1, -0.2, 30.0, 400.0, - -0.25, 0.5, -1.0, -0.0, -0.05, -0.125, 0.0625, 4000.0 - ); - const e = f32x16( - math.atan(v[0]), math.atan(v[1]), math.atan(v[2]), math.atan(v[3]), - math.atan(v[4]), math.atan(v[5]), math.atan(v[6]), math.atan(v[7]), - math.atan(v[8]), math.atan(v[9]), math.atan(v[10]), math.atan(v[11]), - math.atan(v[12]), math.atan(v[13]), math.atan(v[14]), math.atan(v[15]), - ); - // zig fmt: on - try expect(approxEqAbs(e, atan(v), epsilon)); - } - { - try expect(approxEqAbs(atan(splat(F32x4, math.inf_f32)), splat(F32x4, 0.5 * math.pi), epsilon)); - try expect(approxEqAbs(atan(splat(F32x4, -math.inf_f32)), splat(F32x4, -0.5 * math.pi), epsilon)); - try expect(all(isNan(atan(splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(atan(splat(F32x4, -math.nan_f32))), 0) == true); - } -} - -pub fn atan2(vy: anytype, vx: anytype) @TypeOf(vx, vy) { - const T = @TypeOf(vx, vy); - const Tu = @Vector(veclen(T), u32); - - const vx_is_positive = - (@bitCast(Tu, vx) & @splat(veclen(T), @as(u32, 0x8000_0000))) == @splat(veclen(T), @as(u32, 0)); - - const vy_sign = andInt(vy, splatNegativeZero(T)); - const c0_25pi = orInt(vy_sign, splat(T, 0.25 * math.pi)); - const c0_50pi = orInt(vy_sign, splat(T, 0.50 * math.pi)); - const c0_75pi = orInt(vy_sign, splat(T, 0.75 * math.pi)); - const c1_00pi = orInt(vy_sign, splat(T, 1.00 * math.pi)); - - var r1 = select(vx_is_positive, vy_sign, c1_00pi); - var r2 = select(vx == splat(T, 0.0), c0_50pi, splatInt(T, 0xffff_ffff)); - const r3 = select(vy == splat(T, 0.0), r1, r2); - const r4 = select(vx_is_positive, c0_25pi, c0_75pi); - const r5 = select(isInf(vx), r4, c0_50pi); - const result = select(isInf(vy), r5, r3); - const result_valid = @bitCast(Tu, result) == @splat(veclen(T), @as(u32, 0xffff_ffff)); - - const v = vy / vx; - const r0 = atan(v); - - r1 = select(vx_is_positive, splatNegativeZero(T), c1_00pi); - r2 = r0 + r1; - - return select(result_valid, r2, result); -} -test "zmath.atan2" { - // From DirectXMath XMVectorATan2(): - // - // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions: - - // Y == 0 and X is Negative -> Pi with the sign of Y - // y == 0 and x is positive -> 0 with the sign of y - // Y != 0 and X == 0 -> Pi / 2 with the sign of Y - // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y) - // X == -Infinity and Finite Y -> Pi with the sign of Y - // X == +Infinity and Finite Y -> 0 with the sign of Y - // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y - // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y - // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y - - const epsilon = 0.0001; - try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, -1.0)), splat(F32x4, math.pi), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, -1.0)), splat(F32x4, -math.pi), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, 0.0)), splat(F32x4, 0.5 * math.pi), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, 0.0)), splat(F32x4, -0.5 * math.pi), epsilon)); - try expect(approxEqAbs( - atan2(splat(F32x4, 1.0), splat(F32x4, -1.0)), - splat(F32x4, math.atan(@as(f32, -1.0)) + math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2(splat(F32x4, -10.0), splat(F32x4, -2.0)), - splat(F32x4, math.atan(@as(f32, 5.0)) - math.pi), - epsilon, - )); - try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, math.pi), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, -math.inf_f32)), splat(F32x4, -math.pi), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, 1.0), splat(F32x4, math.inf_f32)), splat(F32x4, 0.0), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, -1.0), splat(F32x4, math.inf_f32)), splat(F32x4, -0.0), epsilon)); - try expect(approxEqAbs( - atan2(splat(F32x4, math.inf_f32), splat(F32x4, 2.0)), - splat(F32x4, 0.5 * math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2(splat(F32x4, -math.inf_f32), splat(F32x4, 2.0)), - splat(F32x4, -0.5 * math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2(splat(F32x4, math.inf_f32), splat(F32x4, -math.inf_f32)), - splat(F32x4, 0.75 * math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2(splat(F32x4, -math.inf_f32), splat(F32x4, -math.inf_f32)), - splat(F32x4, -0.75 * math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2(splat(F32x4, math.inf_f32), splat(F32x4, math.inf_f32)), - splat(F32x4, 0.25 * math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2(splat(F32x4, -math.inf_f32), splat(F32x4, math.inf_f32)), - splat(F32x4, -0.25 * math.pi), - epsilon, - )); - try expect(approxEqAbs( - atan2( - f32x8(0.0, -math.inf_f32, -0.0, 2.0, math.inf_f32, math.inf_f32, 1.0, -math.inf_f32), - f32x8(-2.0, math.inf_f32, 1.0, 0.0, 10.0, -math.inf_f32, 1.0, -math.inf_f32), - ), - f32x8( - math.pi, - -0.25 * math.pi, - -0.0, - 0.5 * math.pi, - 0.5 * math.pi, - 0.75 * math.pi, - math.atan(@as(f32, 1.0)), - -0.75 * math.pi, - ), - epsilon, - )); - try expect(approxEqAbs(atan2(splat(F32x4, 0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon)); - try expect(approxEqAbs(atan2(splat(F32x4, -0.0), splat(F32x4, 0.0)), splat(F32x4, 0.0), epsilon)); - try expect(all(isNan(atan2(splat(F32x4, 1.0), splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(atan2(splat(F32x4, -1.0), splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(atan2(splat(F32x4, math.nan_f32), splat(F32x4, -1.0))), 0) == true); - try expect(all(isNan(atan2(splat(F32x4, -math.nan_f32), splat(F32x4, 1.0))), 0) == true); -} -// ------------------------------------------------------------------------------ -// -// 3. 2D, 3D, 4D vector functions -// -// ------------------------------------------------------------------------------ -pub inline fn dot2(v0: Vec, v1: Vec) F32x4 { - var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | -- | -- | - var xmm1 = swizzle(xmm0, .y, .x, .x, .x); // | y0*y1 | -- | -- | -- | - xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[3]); // | x0*x1 + y0*y1 | -- | -- | -- | - return swizzle(xmm0, .x, .x, .x, .x); -} -test "zmath.dot2" { - const v0 = f32x4(-1.0, 2.0, 300.0, -2.0); - const v1 = f32x4(4.0, 5.0, 600.0, 2.0); - var v = dot2(v0, v1); - try expect(approxEqAbs(v, splat(F32x4, 6.0), 0.0001)); -} - -pub inline fn dot3(v0: Vec, v1: Vec) F32x4 { - const dot = v0 * v1; - return f32x4s(dot[0] + dot[1] + dot[2]); -} -test "zmath.dot3" { - const v0 = f32x4(-1.0, 2.0, 3.0, 1.0); - const v1 = f32x4(4.0, 5.0, 6.0, 1.0); - var v = dot3(v0, v1); - try expect(approxEqAbs(v, splat(F32x4, 24.0), 0.0001)); -} - -pub inline fn dot4(v0: Vec, v1: Vec) F32x4 { - var xmm0 = v0 * v1; // | x0*x1 | y0*y1 | z0*z1 | w0*w1 | - var xmm1 = swizzle(xmm0, .y, .x, .w, .x); // | y0*y1 | -- | w0*w1 | -- | - xmm1 = xmm0 + xmm1; // | x0*x1 + y0*y1 | -- | z0*z1 + w0*w1 | -- | - xmm0 = swizzle(xmm1, .z, .x, .x, .x); // | z0*z1 + w0*w1 | -- | -- | -- | - xmm0 = f32x4(xmm0[0] + xmm1[0], xmm0[1], xmm0[2], xmm0[2]); // addss - return swizzle(xmm0, .x, .x, .x, .x); -} -test "zmath.dot4" { - const v0 = f32x4(-1.0, 2.0, 3.0, -2.0); - const v1 = f32x4(4.0, 5.0, 6.0, 2.0); - var v = dot4(v0, v1); - try expect(approxEqAbs(v, splat(F32x4, 20.0), 0.0001)); -} - -pub inline fn cross3(v0: Vec, v1: Vec) Vec { - var xmm0 = swizzle(v0, .y, .z, .x, .w); - var xmm1 = swizzle(v1, .z, .x, .y, .w); - var result = xmm0 * xmm1; - xmm0 = swizzle(xmm0, .y, .z, .x, .w); - xmm1 = swizzle(xmm1, .z, .x, .y, .w); - result = result - xmm0 * xmm1; - return andInt(result, f32x4_mask3); -} -test "zmath.cross3" { - { - const v0 = f32x4(1.0, 0.0, 0.0, 1.0); - const v1 = f32x4(0.0, 1.0, 0.0, 1.0); - var v = cross3(v0, v1); - try expect(approxEqAbs(v, f32x4(0.0, 0.0, 1.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(1.0, 0.0, 0.0, 1.0); - const v1 = f32x4(0.0, -1.0, 0.0, 1.0); - var v = cross3(v0, v1); - try expect(approxEqAbs(v, f32x4(0.0, 0.0, -1.0, 0.0), 0.0001)); - } - { - const v0 = f32x4(-3.0, 0, -2.0, 1.0); - const v1 = f32x4(5.0, -1.0, 2.0, 1.0); - var v = cross3(v0, v1); - try expect(approxEqAbs(v, f32x4(-2.0, -4.0, 3.0, 0.0), 0.0001)); - } -} - -pub inline fn lengthSq2(v: Vec) F32x4 { - return dot2(v, v); -} -pub inline fn lengthSq3(v: Vec) F32x4 { - return dot3(v, v); -} -pub inline fn lengthSq4(v: Vec) F32x4 { - return dot4(v, v); -} - -pub inline fn length2(v: Vec) F32x4 { - return sqrt(dot2(v, v)); -} -pub inline fn length3(v: Vec) F32x4 { - return sqrt(dot3(v, v)); -} -pub inline fn length4(v: Vec) F32x4 { - return sqrt(dot4(v, v)); -} -test "zmath.length3" { - if (builtin.target.os.tag == .macos and builtin.zig_backend != .stage1) return error.SkipZigTest; - { - const v = length3(f32x4(1.0, -2.0, 3.0, 1000.0)); - try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001)); - } - { - const v = length3(f32x4(1.0, math.nan_f32, math.nan_f32, 1000.0)); - try expect(all(isNan(v), 0)); - } - { - const v = length3(f32x4(1.0, math.inf_f32, 3.0, 1000.0)); - try expect(all(isInf(v), 0)); - } - { - const v = length3(f32x4(3.0, 2.0, 1.0, math.nan_f32)); - try expect(approxEqAbs(v, splat(F32x4, math.sqrt(14.0)), 0.001)); - } -} - -pub inline fn normalize2(v: Vec) Vec { - return v * splat(F32x4, 1.0) / sqrt(dot2(v, v)); -} -pub inline fn normalize3(v: Vec) Vec { - return v * splat(F32x4, 1.0) / sqrt(dot3(v, v)); -} -pub inline fn normalize4(v: Vec) Vec { - return v * splat(F32x4, 1.0) / sqrt(dot4(v, v)); -} -test "zmath.normalize3" { - { - const v0 = f32x4(1.0, -2.0, 3.0, 1000.0); - var v = normalize3(v0); - try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(14.0)), 0.0005)); - } - { - try expect(any(isNan(normalize3(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0)); - try expect(any(isNan(normalize3(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0)); - try expect(any(isNan(normalize3(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0)); - try expect(any(isNan(normalize3(f32x4(0, 0, 0, 0))), 0)); - } -} -test "zmath.normalize4" { - { - const v0 = f32x4(1.0, -2.0, 3.0, 10.0); - var v = normalize4(v0); - try expect(approxEqAbs(v, v0 * splat(F32x4, 1.0 / math.sqrt(114.0)), 0.0005)); - } - { - try expect(any(isNan(normalize4(f32x4(1.0, math.inf_f32, 1.0, 1.0))), 0)); - try expect(any(isNan(normalize4(f32x4(-math.inf_f32, math.inf_f32, 0.0, 0.0))), 0)); - try expect(any(isNan(normalize4(f32x4(-math.nan_f32, math.qnan_f32, 0.0, 0.0))), 0)); - try expect(any(isNan(normalize4(f32x4(0, 0, 0, 0))), 0)); - } -} - -fn vecMulMat(v: Vec, m: Mat) Vec { - var vx = @shuffle(f32, v, undefined, [4]i32{ 0, 0, 0, 0 }); - var vy = @shuffle(f32, v, undefined, [4]i32{ 1, 1, 1, 1 }); - var vz = @shuffle(f32, v, undefined, [4]i32{ 2, 2, 2, 2 }); - var vw = @shuffle(f32, v, undefined, [4]i32{ 3, 3, 3, 3 }); - return vx * m[0] + vy * m[1] + vz * m[2] + vw * m[3]; -} -fn matMulVec(m: Mat, v: Vec) Vec { - return .{ dot4(m[0], v)[0], dot4(m[1], v)[0], dot4(m[2], v)[0], dot4(m[3], v)[0] }; -} -test "zmath.vecMulMat" { - const m = Mat{ - f32x4(1.0, 0.0, 0.0, 0.0), - f32x4(0.0, 1.0, 0.0, 0.0), - f32x4(0.0, 0.0, 1.0, 0.0), - f32x4(2.0, 3.0, 4.0, 1.0), - }; - const vm = mul(f32x4(1.0, 2.0, 3.0, 1.0), m); - const mv = mul(m, f32x4(1.0, 2.0, 3.0, 1.0)); - const v = mul(transpose(m), f32x4(1.0, 2.0, 3.0, 1.0)); - try expect(approxEqAbs(vm, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001)); - try expect(approxEqAbs(mv, f32x4(1.0, 2.0, 3.0, 21.0), 0.0001)); - try expect(approxEqAbs(v, f32x4(3.0, 5.0, 7.0, 1.0), 0.0001)); -} -// ------------------------------------------------------------------------------ -// -// 4. Matrix functions -// -// ------------------------------------------------------------------------------ -pub fn identity() Mat { - const static = struct { - const identity = Mat{ - f32x4(1.0, 0.0, 0.0, 0.0), - f32x4(0.0, 1.0, 0.0, 0.0), - f32x4(0.0, 0.0, 1.0, 0.0), - f32x4(0.0, 0.0, 0.0, 1.0), - }; - }; - return static.identity; -} - -fn mulRetType(comptime Ta: type, comptime Tb: type) type { - if (Ta == Mat and Tb == Mat) { - return Mat; - } else if ((Ta == f32 and Tb == Mat) or (Ta == Mat and Tb == f32)) { - return Mat; - } else if ((Ta == Vec and Tb == Mat) or (Ta == Mat and Tb == Vec)) { - return Vec; - } - @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ @typeName(Tb)); -} - -pub fn mul(a: anytype, b: anytype) mulRetType(@TypeOf(a), @TypeOf(b)) { - const Ta = @TypeOf(a); - const Tb = @TypeOf(b); - if (Ta == Mat and Tb == Mat) { - return mulMat(a, b); - } else if (Ta == f32 and Tb == Mat) { - const va = splat(F32x4, a); - return Mat{ va * b[0], va * b[1], va * b[2], va * b[3] }; - } else if (Ta == Mat and Tb == f32) { - const vb = splat(F32x4, b); - return Mat{ a[0] * vb, a[1] * vb, a[2] * vb, a[3] * vb }; - } else if (Ta == Vec and Tb == Mat) { - return vecMulMat(a, b); - } else if (Ta == Mat and Tb == Vec) { - return matMulVec(a, b); - } else { - @compileError("zmath.mul() not implemented for types: " ++ @typeName(Ta) ++ ", " ++ @typeName(Tb)); - } -} -test "zmath.mul" { - { - const m = Mat{ - f32x4(0.1, 0.2, 0.3, 0.4), - f32x4(0.5, 0.6, 0.7, 0.8), - f32x4(0.9, 1.0, 1.1, 1.2), - f32x4(1.3, 1.4, 1.5, 1.6), - }; - const ms = mul(@as(f32, 2.0), m); - try expect(approxEqAbs(ms[0], f32x4(0.2, 0.4, 0.6, 0.8), 0.0001)); - try expect(approxEqAbs(ms[1], f32x4(1.0, 1.2, 1.4, 1.6), 0.0001)); - try expect(approxEqAbs(ms[2], f32x4(1.8, 2.0, 2.2, 2.4), 0.0001)); - try expect(approxEqAbs(ms[3], f32x4(2.6, 2.8, 3.0, 3.2), 0.0001)); - } -} - -fn mulMat(m0: Mat, m1: Mat) Mat { - var result: Mat = undefined; - comptime var row: u32 = 0; - inline while (row < 4) : (row += 1) { - const vx = swizzle(m0[row], .x, .x, .x, .x); - const vy = swizzle(m0[row], .y, .y, .y, .y); - const vz = swizzle(m0[row], .z, .z, .z, .z); - const vw = swizzle(m0[row], .w, .w, .w, .w); - result[row] = mulAdd(vx, m1[0], vz * m1[2]) + mulAdd(vy, m1[1], vw * m1[3]); - } - return result; -} -test "zmath.matrix.mul" { - const a = Mat{ - f32x4(0.1, 0.2, 0.3, 0.4), - f32x4(0.5, 0.6, 0.7, 0.8), - f32x4(0.9, 1.0, 1.1, 1.2), - f32x4(1.3, 1.4, 1.5, 1.6), - }; - const b = Mat{ - f32x4(1.7, 1.8, 1.9, 2.0), - f32x4(2.1, 2.2, 2.3, 2.4), - f32x4(2.5, 2.6, 2.7, 2.8), - f32x4(2.9, 3.0, 3.1, 3.2), - }; - const c = mul(a, b); - try expect(approxEqAbs(c[0], f32x4(2.5, 2.6, 2.7, 2.8), 0.0001)); - try expect(approxEqAbs(c[1], f32x4(6.18, 6.44, 6.7, 6.96), 0.0001)); - try expect(approxEqAbs(c[2], f32x4(9.86, 10.28, 10.7, 11.12), 0.0001)); - try expect(approxEqAbs(c[3], f32x4(13.54, 14.12, 14.7, 15.28), 0.0001)); -} - -pub fn transpose(m: Mat) Mat { - const temp1 = @shuffle(f32, m[0], m[1], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) }); - const temp3 = @shuffle(f32, m[0], m[1], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) }); - const temp2 = @shuffle(f32, m[2], m[3], [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 1) }); - const temp4 = @shuffle(f32, m[2], m[3], [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) }); - return .{ - @shuffle(f32, temp1, temp2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }), - @shuffle(f32, temp1, temp2, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }), - @shuffle(f32, temp3, temp4, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }), - @shuffle(f32, temp3, temp4, [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }), - }; -} -test "zmath.matrix.transpose" { - const m = Mat{ - f32x4(1.0, 2.0, 3.0, 4.0), - f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), - f32x4(13.0, 14.0, 15.0, 16.0), - }; - const mt = transpose(m); - try expect(approxEqAbs(mt[0], f32x4(1.0, 5.0, 9.0, 13.0), 0.0001)); - try expect(approxEqAbs(mt[1], f32x4(2.0, 6.0, 10.0, 14.0), 0.0001)); - try expect(approxEqAbs(mt[2], f32x4(3.0, 7.0, 11.0, 15.0), 0.0001)); - try expect(approxEqAbs(mt[3], f32x4(4.0, 8.0, 12.0, 16.0), 0.0001)); -} - -pub fn rotationX(angle: f32) Mat { - const sc = sincos(angle); - return .{ - f32x4(1.0, 0.0, 0.0, 0.0), - f32x4(0.0, sc[1], sc[0], 0.0), - f32x4(0.0, -sc[0], sc[1], 0.0), - f32x4(0.0, 0.0, 0.0, 1.0), - }; -} - -pub fn rotationY(angle: f32) Mat { - const sc = sincos(angle); - return .{ - f32x4(sc[1], 0.0, -sc[0], 0.0), - f32x4(0.0, 1.0, 0.0, 0.0), - f32x4(sc[0], 0.0, sc[1], 0.0), - f32x4(0.0, 0.0, 0.0, 1.0), - }; -} - -pub fn rotationZ(angle: f32) Mat { - const sc = sincos(angle); - return .{ - f32x4(sc[1], sc[0], 0.0, 0.0), - f32x4(-sc[0], sc[1], 0.0, 0.0), - f32x4(0.0, 0.0, 1.0, 0.0), - f32x4(0.0, 0.0, 0.0, 1.0), - }; -} - -pub fn translation(x: f32, y: f32, z: f32) Mat { - return .{ - f32x4(1.0, 0.0, 0.0, 0.0), - f32x4(0.0, 1.0, 0.0, 0.0), - f32x4(0.0, 0.0, 1.0, 0.0), - f32x4(x, y, z, 1.0), - }; -} -pub fn translationV(v: Vec) Mat { - return translation(v[0], v[1], v[2]); -} - -pub fn scaling(x: f32, y: f32, z: f32) Mat { - return .{ - f32x4(x, 0.0, 0.0, 0.0), - f32x4(0.0, y, 0.0, 0.0), - f32x4(0.0, 0.0, z, 0.0), - f32x4(0.0, 0.0, 0.0, 1.0), - }; -} -pub fn scalingV(v: Vec) Mat { - return scaling(v[0], v[1], v[2]); -} - -pub fn lookToLh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat { - const az = normalize3(eyedir); - const ax = normalize3(cross3(updir, az)); - const ay = normalize3(cross3(az, ax)); - return transpose(.{ - f32x4(ax[0], ax[1], ax[2], -dot3(ax, eyepos)[0]), - f32x4(ay[0], ay[1], ay[2], -dot3(ay, eyepos)[0]), - f32x4(az[0], az[1], az[2], -dot3(az, eyepos)[0]), - f32x4(0.0, 0.0, 0.0, 1.0), - }); -} -pub fn lookToRh(eyepos: Vec, eyedir: Vec, updir: Vec) Mat { - return lookToLh(eyepos, -eyedir, updir); -} -pub fn lookAtLh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat { - return lookToLh(eyepos, focuspos - eyepos, updir); -} -pub fn lookAtRh(eyepos: Vec, focuspos: Vec, updir: Vec) Mat { - return lookToLh(eyepos, eyepos - focuspos, updir); -} -test "zmath.matrix.lookToLh" { - const m = lookToLh(f32x4(0.0, 0.0, -3.0, 1.0), f32x4(0.0, 0.0, 1.0, 0.0), f32x4(0.0, 1.0, 0.0, 0.0)); - try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.001)); - try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.001)); - try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.001)); - try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 3.0, 1.0), 0.001)); -} - -pub fn perspectiveFovLh(fovy: f32, aspect: f32, near: f32, far: f32) Mat { - const scfov = sincos(0.5 * fovy); - - assert(near > 0.0 and far > 0.0 and far > near); - assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); - - const h = scfov[1] / scfov[0]; - const w = h / aspect; - const r = far / (far - near); - return .{ - f32x4(w, 0.0, 0.0, 0.0), - f32x4(0.0, h, 0.0, 0.0), - f32x4(0.0, 0.0, r, 1.0), - f32x4(0.0, 0.0, -r * near, 0.0), - }; -} -pub fn perspectiveFovRh(fovy: f32, aspect: f32, near: f32, far: f32) Mat { - const scfov = sincos(0.5 * fovy); - - assert(near > 0.0 and far > 0.0 and far > near); - assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); - - const h = scfov[1] / scfov[0]; - const w = h / aspect; - const r = far / (near - far); - return .{ - f32x4(w, 0.0, 0.0, 0.0), - f32x4(0.0, h, 0.0, 0.0), - f32x4(0.0, 0.0, r, -1.0), - f32x4(0.0, 0.0, r * near, 0.0), - }; -} - -// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) -pub fn perspectiveFovLhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat { - const scfov = sincos(0.5 * fovy); - - assert(near > 0.0 and far > 0.0 and far > near); - assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); - - const h = scfov[1] / scfov[0]; - const w = h / aspect; - const r = far - near; - return .{ - f32x4(w, 0.0, 0.0, 0.0), - f32x4(0.0, h, 0.0, 0.0), - f32x4(0.0, 0.0, (near + far) / r, 1.0), - f32x4(0.0, 0.0, 2.0 * near * far / -r, 0.0), - }; -} - -// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) -pub fn perspectiveFovRhGl(fovy: f32, aspect: f32, near: f32, far: f32) Mat { - const scfov = sincos(0.5 * fovy); - - assert(near > 0.0 and far > 0.0 and far > near); - assert(!math.approxEqAbs(f32, scfov[0], 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - assert(!math.approxEqAbs(f32, aspect, 0.0, 0.01)); - - const h = scfov[1] / scfov[0]; - const w = h / aspect; - const r = near - far; - return .{ - f32x4(w, 0.0, 0.0, 0.0), - f32x4(0.0, h, 0.0, 0.0), - f32x4(0.0, 0.0, (near + far) / r, -1.0), - f32x4(0.0, 0.0, 2.0 * near * far / r, 0.0), - }; -} - -pub fn orthographicLh(w: f32, h: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = 1 / (far - near); - return .{ - f32x4(2 / w, 0.0, 0.0, 0.0), - f32x4(0.0, 2 / h, 0.0, 0.0), - f32x4(0.0, 0.0, r, 0.0), - f32x4(0.0, 0.0, -r * near, 1.0), - }; -} - -pub fn orthographicRh(w: f32, h: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = 1 / (near - far); - return .{ - f32x4(2 / w, 0.0, 0.0, 0.0), - f32x4(0.0, 2 / h, 0.0, 0.0), - f32x4(0.0, 0.0, r, 0.0), - f32x4(0.0, 0.0, r * near, 1.0), - }; -} - -// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) -pub fn orthographicLhGl(w: f32, h: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = far - near; - return .{ - f32x4(2 / w, 0.0, 0.0, 0.0), - f32x4(0.0, 2 / h, 0.0, 0.0), - f32x4(0.0, 0.0, 2 / r, 0.0), - f32x4(0.0, 0.0, (near + far) / -r, 1.0), - }; -} - -// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) -pub fn orthographicRhGl(w: f32, h: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, w, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, h, 0.0, 0.001)); - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = near - far; - return .{ - f32x4(2 / w, 0.0, 0.0, 0.0), - f32x4(0.0, 2 / h, 0.0, 0.0), - f32x4(0.0, 0.0, 2 / r, 0.0), - f32x4(0.0, 0.0, (near + far) / r, 1.0), - }; -} - -pub fn orthographicOffCenterLh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = 1 / (far - near); - return .{ - f32x4(2 / (right - left), 0.0, 0.0, 0.0), - f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), - f32x4(0.0, 0.0, r, 0.0), - f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), -r * near, 1.0), - }; -} - -pub fn orthographicOffCenterRh(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = 1 / (near - far); - return .{ - f32x4(2 / (right - left), 0.0, 0.0, 0.0), - f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), - f32x4(0.0, 0.0, r, 0.0), - f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), r * near, 1.0), - }; -} - -// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) -pub fn orthographicOffCenterLhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = far - near; - return .{ - f32x4(2 / (right - left), 0.0, 0.0, 0.0), - f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), - f32x4(0.0, 0.0, 2 / r, 0.0), - f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / -r, 1.0), - }; -} - -// Produces Z values in [-1.0, 1.0] range (OpenGL defaults) -pub fn orthographicOffCenterRhGl(left: f32, right: f32, top: f32, bottom: f32, near: f32, far: f32) Mat { - assert(!math.approxEqAbs(f32, far, near, 0.001)); - - const r = near - far; - return .{ - f32x4(2 / (right - left), 0.0, 0.0, 0.0), - f32x4(0.0, 2 / (top - bottom), 0.0, 0.0), - f32x4(0.0, 0.0, 2 / r, 0.0), - f32x4(-(right + left) / (right - left), -(top + bottom) / (top - bottom), (near + far) / r, 1.0), - }; -} - -pub fn determinant(m: Mat) F32x4 { - var v0 = swizzle(m[2], .y, .x, .x, .x); - var v1 = swizzle(m[3], .z, .z, .y, .y); - var v2 = swizzle(m[2], .y, .x, .x, .x); - var v3 = swizzle(m[3], .w, .w, .w, .z); - var v4 = swizzle(m[2], .z, .z, .y, .y); - var v5 = swizzle(m[3], .w, .w, .w, .z); - - var p0 = v0 * v1; - var p1 = v2 * v3; - var p2 = v4 * v5; - - v0 = swizzle(m[2], .z, .z, .y, .y); - v1 = swizzle(m[3], .y, .x, .x, .x); - v2 = swizzle(m[2], .w, .w, .w, .z); - v3 = swizzle(m[3], .y, .x, .x, .x); - v4 = swizzle(m[2], .w, .w, .w, .z); - v5 = swizzle(m[3], .z, .z, .y, .y); - - p0 = mulAdd(-v0, v1, p0); - p1 = mulAdd(-v2, v3, p1); - p2 = mulAdd(-v4, v5, p2); - - v0 = swizzle(m[1], .w, .w, .w, .z); - v1 = swizzle(m[1], .z, .z, .y, .y); - v2 = swizzle(m[1], .y, .x, .x, .x); - - var s = m[0] * f32x4(1.0, -1.0, 1.0, -1.0); - var r = v0 * p0; - r = mulAdd(-v1, p1, r); - r = mulAdd(v2, p2, r); - return dot4(s, r); -} -test "zmath.matrix.determinant" { - const m = Mat{ - f32x4(10.0, -9.0, -12.0, 1.0), - f32x4(7.0, -12.0, 11.0, 1.0), - f32x4(-10.0, 10.0, 3.0, 1.0), - f32x4(1.0, 2.0, 3.0, 4.0), - }; - try expect(approxEqAbs(determinant(m), splat(F32x4, 2939.0), 0.0001)); -} - -pub fn inverse(a: anytype) @TypeOf(a) { - const T = @TypeOf(a); - return switch (T) { - Mat => inverseMat(a), - Quat => inverseQuat(a), - else => @compileError("zmath.inverse() not implemented for " ++ @typeName(T)), - }; -} - -fn inverseMat(m: Mat) Mat { - return inverseDet(m, null); -} - -pub fn inverseDet(m: Mat, out_det: ?*F32x4) Mat { - const mt = transpose(m); - var v0: [4]F32x4 = undefined; - var v1: [4]F32x4 = undefined; - - v0[0] = swizzle(mt[2], .x, .x, .y, .y); - v1[0] = swizzle(mt[3], .z, .w, .z, .w); - v0[1] = swizzle(mt[0], .x, .x, .y, .y); - v1[1] = swizzle(mt[1], .z, .w, .z, .w); - v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); - v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); - - var d0 = v0[0] * v1[0]; - var d1 = v0[1] * v1[1]; - var d2 = v0[2] * v1[2]; - - v0[0] = swizzle(mt[2], .z, .w, .z, .w); - v1[0] = swizzle(mt[3], .x, .x, .y, .y); - v0[1] = swizzle(mt[0], .z, .w, .z, .w); - v1[1] = swizzle(mt[1], .x, .x, .y, .y); - v0[2] = @shuffle(f32, mt[2], mt[0], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); - v1[2] = @shuffle(f32, mt[3], mt[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); - - d0 = mulAdd(-v0[0], v1[0], d0); - d1 = mulAdd(-v0[1], v1[1], d1); - d2 = mulAdd(-v0[2], v1[2], d2); - - v0[0] = swizzle(mt[1], .y, .z, .x, .y); - v1[0] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 1, 3, 0 }); - v0[1] = swizzle(mt[0], .z, .x, .y, .x); - v1[1] = @shuffle(f32, d0, d2, [4]i32{ 3, ~@as(i32, 1), 1, 2 }); - v0[2] = swizzle(mt[3], .y, .z, .x, .y); - v1[2] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 1, 3, 0 }); - v0[3] = swizzle(mt[2], .z, .x, .y, .x); - v1[3] = @shuffle(f32, d1, d2, [4]i32{ 3, ~@as(i32, 3), 1, 2 }); - - var c0 = v0[0] * v1[0]; - var c2 = v0[1] * v1[1]; - var c4 = v0[2] * v1[2]; - var c6 = v0[3] * v1[3]; - - v0[0] = swizzle(mt[1], .z, .w, .y, .z); - v1[0] = @shuffle(f32, d0, d2, [4]i32{ 3, 0, 1, ~@as(i32, 0) }); - v0[1] = swizzle(mt[0], .w, .z, .w, .y); - v1[1] = @shuffle(f32, d0, d2, [4]i32{ 2, 1, ~@as(i32, 0), 0 }); - v0[2] = swizzle(mt[3], .z, .w, .y, .z); - v1[2] = @shuffle(f32, d1, d2, [4]i32{ 3, 0, 1, ~@as(i32, 2) }); - v0[3] = swizzle(mt[2], .w, .z, .w, .y); - v1[3] = @shuffle(f32, d1, d2, [4]i32{ 2, 1, ~@as(i32, 2), 0 }); - - c0 = mulAdd(-v0[0], v1[0], c0); - c2 = mulAdd(-v0[1], v1[1], c2); - c4 = mulAdd(-v0[2], v1[2], c4); - c6 = mulAdd(-v0[3], v1[3], c6); - - v0[0] = swizzle(mt[1], .w, .x, .w, .x); - v1[0] = @shuffle(f32, d0, d2, [4]i32{ 2, ~@as(i32, 1), ~@as(i32, 0), 2 }); - v0[1] = swizzle(mt[0], .y, .w, .x, .z); - v1[1] = @shuffle(f32, d0, d2, [4]i32{ ~@as(i32, 1), 0, 3, ~@as(i32, 0) }); - v0[2] = swizzle(mt[3], .w, .x, .w, .x); - v1[2] = @shuffle(f32, d1, d2, [4]i32{ 2, ~@as(i32, 3), ~@as(i32, 2), 2 }); - v0[3] = swizzle(mt[2], .y, .w, .x, .z); - v1[3] = @shuffle(f32, d1, d2, [4]i32{ ~@as(i32, 3), 0, 3, ~@as(i32, 2) }); - - const c1 = mulAdd(-v0[0], v1[0], c0); - const c3 = mulAdd(v0[1], v1[1], c2); - const c5 = mulAdd(-v0[2], v1[2], c4); - const c7 = mulAdd(v0[3], v1[3], c6); - - c0 = mulAdd(v0[0], v1[0], c0); - c2 = mulAdd(-v0[1], v1[1], c2); - c4 = mulAdd(v0[2], v1[2], c4); - c6 = mulAdd(-v0[3], v1[3], c6); - - var mr = Mat{ - f32x4(c0[0], c1[1], c0[2], c1[3]), - f32x4(c2[0], c3[1], c2[2], c3[3]), - f32x4(c4[0], c5[1], c4[2], c5[3]), - f32x4(c6[0], c7[1], c6[2], c7[3]), - }; - - const det = dot4(mr[0], mt[0]); - if (out_det != null) { - out_det.?.* = det; - } - - if (math.approxEqAbs(f32, det[0], 0.0, math.f32_epsilon)) { - return .{ - f32x4(0.0, 0.0, 0.0, 0.0), - f32x4(0.0, 0.0, 0.0, 0.0), - f32x4(0.0, 0.0, 0.0, 0.0), - f32x4(0.0, 0.0, 0.0, 0.0), - }; - } - - const scale = splat(F32x4, 1.0) / det; - mr[0] *= scale; - mr[1] *= scale; - mr[2] *= scale; - mr[3] *= scale; - return mr; -} -test "zmath.matrix.inverse" { - const m = Mat{ - f32x4(10.0, -9.0, -12.0, 1.0), - f32x4(7.0, -12.0, 11.0, 1.0), - f32x4(-10.0, 10.0, 3.0, 1.0), - f32x4(1.0, 2.0, 3.0, 4.0), - }; - var det: F32x4 = undefined; - const mi = inverseDet(m, &det); - try expect(approxEqAbs(det, splat(F32x4, 2939.0), 0.0001)); - - try expect(approxEqAbs(mi[0], f32x4(-0.170806, -0.13576, -0.349439, 0.164001), 0.0001)); - try expect(approxEqAbs(mi[1], f32x4(-0.163661, -0.14801, -0.253147, 0.141204), 0.0001)); - try expect(approxEqAbs(mi[2], f32x4(-0.0871045, 0.00646478, -0.0785982, 0.0398095), 0.0001)); - try expect(approxEqAbs(mi[3], f32x4(0.18986, 0.103096, 0.272882, 0.10854), 0.0001)); -} - -pub fn matFromNormAxisAngle(axis: Vec, angle: f32) Mat { - const sincos_angle = sincos(angle); - - const c2 = splat(F32x4, 1.0 - sincos_angle[1]); - const c1 = splat(F32x4, sincos_angle[1]); - const c0 = splat(F32x4, sincos_angle[0]); - - const n0 = swizzle(axis, .y, .z, .x, .w); - const n1 = swizzle(axis, .z, .x, .y, .w); - - var v0 = c2 * n0 * n1; - const r0 = c2 * axis * axis + c1; - const r1 = c0 * axis + v0; - var r2 = v0 - c0 * axis; - - v0 = andInt(r0, f32x4_mask3); - - var v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 2, ~@as(i32, 1), ~@as(i32, 2) }); - v1 = swizzle(v1, .y, .z, .w, .x); - - var v2 = @shuffle(f32, r1, r2, [4]i32{ 1, 1, ~@as(i32, 0), ~@as(i32, 0) }); - v2 = swizzle(v2, .x, .z, .x, .z); - - r2 = @shuffle(f32, v0, v1, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) }); - r2 = swizzle(r2, .x, .z, .w, .y); - - var m: Mat = undefined; - m[0] = r2; - - r2 = @shuffle(f32, v0, v1, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) }); - r2 = swizzle(r2, .z, .x, .w, .y); - m[1] = r2; - - v2 = @shuffle(f32, v2, v0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) }); - m[2] = v2; - m[3] = f32x4(0.0, 0.0, 0.0, 1.0); - return m; -} -pub fn matFromAxisAngle(axis: Vec, angle: f32) Mat { - assert(!all(axis == splat(F32x4, 0.0), 3)); - assert(!all(isInf(axis), 3)); - const normal = normalize3(axis); - return matFromNormAxisAngle(normal, angle); -} -test "zmath.matrix.matFromAxisAngle" { - { - const m0 = matFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), math.pi * 0.25); - const m1 = rotationX(math.pi * 0.25); - try expect(approxEqAbs(m0[0], m1[0], 0.001)); - try expect(approxEqAbs(m0[1], m1[1], 0.001)); - try expect(approxEqAbs(m0[2], m1[2], 0.001)); - try expect(approxEqAbs(m0[3], m1[3], 0.001)); - } - { - const m0 = matFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), math.pi * 0.125); - const m1 = rotationY(math.pi * 0.125); - try expect(approxEqAbs(m0[0], m1[0], 0.001)); - try expect(approxEqAbs(m0[1], m1[1], 0.001)); - try expect(approxEqAbs(m0[2], m1[2], 0.001)); - try expect(approxEqAbs(m0[3], m1[3], 0.001)); - } - { - const m0 = matFromAxisAngle(f32x4(0.0, 0.0, 1.0, 0.0), math.pi * 0.333); - const m1 = rotationZ(math.pi * 0.333); - try expect(approxEqAbs(m0[0], m1[0], 0.001)); - try expect(approxEqAbs(m0[1], m1[1], 0.001)); - try expect(approxEqAbs(m0[2], m1[2], 0.001)); - try expect(approxEqAbs(m0[3], m1[3], 0.001)); - } -} - -pub fn matFromQuat(quat: Quat) Mat { - var q0 = quat + quat; - var q1 = quat * q0; - - var v0 = swizzle(q1, .y, .x, .x, .w); - v0 = andInt(v0, f32x4_mask3); - - var v1 = swizzle(q1, .z, .z, .y, .w); - v1 = andInt(v1, f32x4_mask3); - - var r0 = (f32x4(1.0, 1.0, 1.0, 0.0) - v0) - v1; - - v0 = swizzle(quat, .x, .x, .y, .w); - v1 = swizzle(q0, .z, .y, .z, .w); - v0 = v0 * v1; - - v1 = swizzle(quat, .w, .w, .w, .w); - var v2 = swizzle(q0, .y, .z, .x, .w); - v1 = v1 * v2; - - var r1 = v0 + v1; - var r2 = v0 - v1; - - v0 = @shuffle(f32, r1, r2, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) }); - v0 = swizzle(v0, .x, .z, .w, .y); - v1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 2), ~@as(i32, 2) }); - v1 = swizzle(v1, .x, .z, .x, .z); - - q1 = @shuffle(f32, r0, v0, [4]i32{ 0, 3, ~@as(i32, 0), ~@as(i32, 1) }); - q1 = swizzle(q1, .x, .z, .w, .y); - - var m: Mat = undefined; - m[0] = q1; - - q1 = @shuffle(f32, r0, v0, [4]i32{ 1, 3, ~@as(i32, 2), ~@as(i32, 3) }); - q1 = swizzle(q1, .z, .x, .w, .y); - m[1] = q1; - - q1 = @shuffle(f32, v1, r0, [4]i32{ 0, 1, ~@as(i32, 2), ~@as(i32, 3) }); - m[2] = q1; - m[3] = f32x4(0.0, 0.0, 0.0, 1.0); - return m; -} -test "zmath.matrix.matFromQuat" { - { - const m = matFromQuat(f32x4(0.0, 0.0, 0.0, 1.0)); - try expect(approxEqAbs(m[0], f32x4(1.0, 0.0, 0.0, 0.0), 0.0001)); - try expect(approxEqAbs(m[1], f32x4(0.0, 1.0, 0.0, 0.0), 0.0001)); - try expect(approxEqAbs(m[2], f32x4(0.0, 0.0, 1.0, 0.0), 0.0001)); - try expect(approxEqAbs(m[3], f32x4(0.0, 0.0, 0.0, 1.0), 0.0001)); - } -} - -pub fn matFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Mat { - return matFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0)); -} -pub fn matFromRollPitchYawV(angles: Vec) Mat { - return matFromQuat(quatFromRollPitchYawV(angles)); -} - -pub fn matToQuat(m: Mat) Quat { - return quatFromMat(m); -} - -pub inline fn loadMat(mem: []const f32) Mat { - return .{ - load(mem[0..4], F32x4, 0), - load(mem[4..8], F32x4, 0), - load(mem[8..12], F32x4, 0), - load(mem[12..16], F32x4, 0), - }; -} -test "zmath.loadMat" { - const a = [18]f32{ - 1.0, 2.0, 3.0, 4.0, - 5.0, 6.0, 7.0, 8.0, - 9.0, 10.0, 11.0, 12.0, - 13.0, 14.0, 15.0, 16.0, - 17.0, 18.0, - }; - const m = loadMat(a[1..]); - try expect(approxEqAbs(m[0], f32x4(2.0, 3.0, 4.0, 5.0), 0.0)); - try expect(approxEqAbs(m[1], f32x4(6.0, 7.0, 8.0, 9.0), 0.0)); - try expect(approxEqAbs(m[2], f32x4(10.0, 11.0, 12.0, 13.0), 0.0)); - try expect(approxEqAbs(m[3], f32x4(14.0, 15.0, 16.0, 17.0), 0.0)); -} - -pub inline fn storeMat(mem: []f32, m: Mat) void { - store(mem[0..4], m[0], 0); - store(mem[4..8], m[1], 0); - store(mem[8..12], m[2], 0); - store(mem[12..16], m[3], 0); -} - -pub inline fn loadMat43(mem: []const f32) Mat { - return .{ - f32x4(mem[0], mem[1], mem[2], 0.0), - f32x4(mem[3], mem[4], mem[5], 0.0), - f32x4(mem[6], mem[7], mem[8], 0.0), - f32x4(mem[9], mem[10], mem[11], 1.0), - }; -} - -pub inline fn storeMat43(mem: []f32, m: Mat) void { - store(mem[0..3], m[0], 3); - store(mem[3..6], m[1], 3); - store(mem[6..9], m[2], 3); - store(mem[9..12], m[3], 3); -} - -pub inline fn loadMat34(mem: []const f32) Mat { - return .{ - load(mem[0..4], F32x4, 0), - load(mem[4..8], F32x4, 0), - load(mem[8..12], F32x4, 0), - f32x4(0.0, 0.0, 0.0, 1.0), - }; -} - -pub inline fn storeMat34(mem: []f32, m: Mat) void { - store(mem[0..4], m[0], 0); - store(mem[4..8], m[1], 0); - store(mem[8..12], m[2], 0); -} - -pub inline fn matToArr(m: Mat) [16]f32 { - var array: [16]f32 = undefined; - storeMat(array[0..], m); - return array; -} - -pub inline fn matToArr43(m: Mat) [12]f32 { - var array: [12]f32 = undefined; - storeMat43(array[0..], m); - return array; -} - -pub inline fn matToArr34(m: Mat) [12]f32 { - var array: [12]f32 = undefined; - storeMat34(array[0..], m); - return array; -} -// ------------------------------------------------------------------------------ -// -// 5. Quaternion functions -// -// ------------------------------------------------------------------------------ -pub fn qmul(q0: Quat, q1: Quat) Quat { - var result = swizzle(q1, .w, .w, .w, .w); - var q1x = swizzle(q1, .x, .x, .x, .x); - var q1y = swizzle(q1, .y, .y, .y, .y); - var q1z = swizzle(q1, .z, .z, .z, .z); - result = result * q0; - var q0_shuf = swizzle(q0, .w, .z, .y, .x); - q1x = q1x * q0_shuf; - q0_shuf = swizzle(q0_shuf, .y, .x, .w, .z); - result = mulAdd(q1x, f32x4(1.0, -1.0, 1.0, -1.0), result); - q1y = q1y * q0_shuf; - q0_shuf = swizzle(q0_shuf, .w, .z, .y, .x); - q1y = q1y * f32x4(1.0, 1.0, -1.0, -1.0); - q1z = q1z * q0_shuf; - q1y = mulAdd(q1z, f32x4(-1.0, 1.0, 1.0, -1.0), q1y); - return result + q1y; -} -test "zmath.quaternion.mul" { - { - const q0 = f32x4(2.0, 3.0, 4.0, 1.0); - const q1 = f32x4(3.0, 2.0, 1.0, 4.0); - try expect(approxEqAbs(qmul(q0, q1), f32x4(16.0, 4.0, 22.0, -12.0), 0.0001)); - } -} - -pub fn quatToMat(quat: Quat) Mat { - return matFromQuat(quat); -} - -pub fn quatToAxisAngle(quat: Quat, axis: *Vec, angle: *f32) void { - axis.* = quat; - angle.* = 2.0 * acos(quat[3]); -} -test "zmath.quaternion.quatToAxisAngle" { - { - const q0 = quatFromNormAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi); - var axis: Vec = f32x4(4.0, 3.0, 2.0, 1.0); - var angle: f32 = 10.0; - quatToAxisAngle(q0, &axis, &angle); - try expect(math.approxEqAbs(f32, axis[0], @sin(@as(f32, 0.25) * math.pi * 0.5), 0.0001)); - try expect(axis[1] == 0.0); - try expect(axis[2] == 0.0); - try expect(math.approxEqAbs(f32, angle, 0.25 * math.pi, 0.0001)); - } -} - -pub fn quatFromMat(m: Mat) Quat { - const r0 = m[0]; - const r1 = m[1]; - const r2 = m[2]; - const r00 = swizzle(r0, .x, .x, .x, .x); - const r11 = swizzle(r1, .y, .y, .y, .y); - const r22 = swizzle(r2, .z, .z, .z, .z); - - const x2gey2 = (r11 - r00) <= splat(F32x4, 0.0); - const z2gew2 = (r11 + r00) <= splat(F32x4, 0.0); - const x2py2gez2pw2 = r22 <= splat(F32x4, 0.0); - - var t0 = mulAdd(r00, f32x4(1.0, -1.0, -1.0, 1.0), splat(F32x4, 1.0)); - var t1 = r11 * f32x4(-1.0, 1.0, -1.0, 1.0); - var t2 = mulAdd(r22, f32x4(-1.0, -1.0, 1.0, 1.0), t0); - const x2y2z2w2 = t1 + t2; - - t0 = @shuffle(f32, r0, r1, [4]i32{ 1, 2, ~@as(i32, 2), ~@as(i32, 1) }); - t1 = @shuffle(f32, r1, r2, [4]i32{ 0, 0, ~@as(i32, 0), ~@as(i32, 1) }); - t1 = swizzle(t1, .x, .z, .w, .y); - const xyxzyz = t0 + t1; - - t0 = @shuffle(f32, r2, r1, [4]i32{ 1, 0, ~@as(i32, 0), ~@as(i32, 0) }); - t1 = @shuffle(f32, r1, r0, [4]i32{ 2, 2, ~@as(i32, 2), ~@as(i32, 1) }); - t1 = swizzle(t1, .x, .z, .w, .y); - const xwywzw = (t0 - t1) * f32x4(-1.0, 1.0, -1.0, 1.0); - - t0 = @shuffle(f32, x2y2z2w2, xyxzyz, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 0) }); - t1 = @shuffle(f32, x2y2z2w2, xwywzw, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 0) }); - t2 = @shuffle(f32, xyxzyz, xwywzw, [4]i32{ 1, 2, ~@as(i32, 0), ~@as(i32, 1) }); - - const tensor0 = @shuffle(f32, t0, t2, [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); - const tensor1 = @shuffle(f32, t0, t2, [4]i32{ 2, 1, ~@as(i32, 1), ~@as(i32, 3) }); - const tensor2 = @shuffle(f32, t2, t1, [4]i32{ 0, 1, ~@as(i32, 0), ~@as(i32, 2) }); - const tensor3 = @shuffle(f32, t2, t1, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 1) }); - - t0 = select(x2gey2, tensor0, tensor1); - t1 = select(z2gew2, tensor2, tensor3); - t2 = select(x2py2gez2pw2, t0, t1); - - return t2 / length4(t2); -} -test "zmath.quatFromMat" { - { - const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi); - const q1 = quatFromMat(rotationX(0.25 * math.pi)); - try expect(approxEqAbs(q0, q1, 0.0001)); - } - { - const q0 = quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi); - const q1 = quatFromMat(matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi)); - try expect(approxEqAbs(q0, q1, 0.0001)); - } - { - const q0 = quatFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi); - const q1 = quatFromMat(matFromRollPitchYaw(0.1 * math.pi, -0.2 * math.pi, 0.3 * math.pi)); - try expect(approxEqAbs(q0, q1, 0.0001)); - } -} - -pub fn quatFromNormAxisAngle(axis: Vec, angle: f32) Quat { - var n = f32x4(axis[0], axis[1], axis[2], 1.0); - const sc = sincos(0.5 * angle); - return n * f32x4(sc[0], sc[0], sc[0], sc[1]); -} -pub fn quatFromAxisAngle(axis: Vec, angle: f32) Quat { - assert(!all(axis == splat(F32x4, 0.0), 3)); - assert(!all(isInf(axis), 3)); - const normal = normalize3(axis); - return quatFromNormAxisAngle(normal, angle); -} -test "zmath.quaternion.quatFromNormAxisAngle" { - { - const q0 = quatFromAxisAngle(f32x4(1.0, 0.0, 0.0, 0.0), 0.25 * math.pi); - const q1 = quatFromAxisAngle(f32x4(0.0, 1.0, 0.0, 0.0), 0.125 * math.pi); - const m0 = rotationX(0.25 * math.pi); - const m1 = rotationY(0.125 * math.pi); - const mr0 = quatToMat(qmul(q0, q1)); - const mr1 = mul(m0, m1); - try expect(approxEqAbs(mr0[0], mr1[0], 0.0001)); - try expect(approxEqAbs(mr0[1], mr1[1], 0.0001)); - try expect(approxEqAbs(mr0[2], mr1[2], 0.0001)); - try expect(approxEqAbs(mr0[3], mr1[3], 0.0001)); - } - { - const m0 = quatToMat(quatFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi)); - const m1 = matFromAxisAngle(f32x4(1.0, 2.0, 0.5, 0.0), 0.25 * math.pi); - try expect(approxEqAbs(m0[0], m1[0], 0.0001)); - try expect(approxEqAbs(m0[1], m1[1], 0.0001)); - try expect(approxEqAbs(m0[2], m1[2], 0.0001)); - try expect(approxEqAbs(m0[3], m1[3], 0.0001)); - } -} - -pub inline fn qidentity() Quat { - return f32x4(@as(f32, 0.0), @as(f32, 0.0), @as(f32, 0.0), @as(f32, 1.0)); -} - -pub inline fn conjugate(quat: Quat) Quat { - return quat * f32x4(-1.0, -1.0, -1.0, 1.0); -} - -fn inverseQuat(quat: Quat) Quat { - const l = lengthSq4(quat); - const conj = conjugate(quat); - return select(l <= splat(F32x4, math.f32_epsilon), splat(F32x4, 0.0), conj / l); -} -test "zmath.quaternion.inverseQuat" { - try expect(approxEqAbs( - inverse(f32x4(2.0, 3.0, 4.0, 1.0)), - f32x4(-1.0 / 15.0, -1.0 / 10.0, -2.0 / 15.0, 1.0 / 30.0), - 0.0001, - )); - try expect(approxEqAbs(inverse(qidentity()), qidentity(), 0.0001)); -} - -pub fn slerp(q0: Quat, q1: Quat, t: f32) Quat { - return slerpV(q0, q1, splat(F32x4, t)); -} -pub fn slerpV(q0: Quat, q1: Quat, t: F32x4) Quat { - var cos_omega = dot4(q0, q1); - const sign = select(cos_omega < splat(F32x4, 0.0), splat(F32x4, -1.0), splat(F32x4, 1.0)); - - cos_omega = cos_omega * sign; - const sin_omega = sqrt(splat(F32x4, 1.0) - cos_omega * cos_omega); - - const omega = atan2(sin_omega, cos_omega); - - var v01 = t; - v01 = xorInt(andInt(v01, f32x4_mask2), f32x4_sign_mask1); - v01 = f32x4(1.0, 0.0, 0.0, 0.0) + v01; - - var s0 = sin(v01 * omega) / sin_omega; - s0 = select(cos_omega < splat(F32x4, 1.0 - 0.00001), s0, v01); - - var s1 = swizzle(s0, .y, .y, .y, .y); - s0 = swizzle(s0, .x, .x, .x, .x); - - return q0 * s0 + sign * q1 * s1; -} -test "zmath.quaternion.slerp" { - const from = f32x4(0.0, 0.0, 0.0, 1.0); - const to = f32x4(0.5, 0.5, -0.5, 0.5); - const result = slerp(from, to, 0.5); - try expect(approxEqAbs(result, f32x4(0.28867513, 0.28867513, -0.28867513, 0.86602540), 0.0001)); -} - -pub fn quatFromRollPitchYaw(pitch: f32, yaw: f32, roll: f32) Quat { - return quatFromRollPitchYawV(f32x4(pitch, yaw, roll, 0.0)); -} -pub fn quatFromRollPitchYawV(angles: Vec) Quat { // | pitch | yaw | roll | 0 | - const sc = sincos(splat(Vec, 0.5) * angles); - const p0 = @shuffle(f32, sc[1], sc[0], [4]i32{ ~@as(i32, 0), 0, 0, 0 }); - const p1 = @shuffle(f32, sc[0], sc[1], [4]i32{ ~@as(i32, 0), 0, 0, 0 }); - const y0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 1, ~@as(i32, 1), 1, 1 }); - const y1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 1, ~@as(i32, 1), 1, 1 }); - const r0 = @shuffle(f32, sc[1], sc[0], [4]i32{ 2, 2, ~@as(i32, 2), 2 }); - const r1 = @shuffle(f32, sc[0], sc[1], [4]i32{ 2, 2, ~@as(i32, 2), 2 }); - const q1 = p1 * f32x4(1.0, -1.0, -1.0, 1.0) * y1; - const q0 = p0 * y0 * r0; - return mulAdd(q1, r1, q0); -} -test "zmath.quaternion.quatFromRollPitchYawV" { - { - const m0 = quatToMat(quatFromRollPitchYawV(f32x4(0.25 * math.pi, 0.0, 0.0, 0.0))); - const m1 = rotationX(0.25 * math.pi); - try expect(approxEqAbs(m0[0], m1[0], 0.0001)); - try expect(approxEqAbs(m0[1], m1[1], 0.0001)); - try expect(approxEqAbs(m0[2], m1[2], 0.0001)); - try expect(approxEqAbs(m0[3], m1[3], 0.0001)); - } - { - const m0 = quatToMat(quatFromRollPitchYaw(0.1 * math.pi, 0.2 * math.pi, 0.3 * math.pi)); - const m1 = mul( - rotationZ(0.3 * math.pi), - mul(rotationX(0.1 * math.pi), rotationY(0.2 * math.pi)), - ); - try expect(approxEqAbs(m0[0], m1[0], 0.0001)); - try expect(approxEqAbs(m0[1], m1[1], 0.0001)); - try expect(approxEqAbs(m0[2], m1[2], 0.0001)); - try expect(approxEqAbs(m0[3], m1[3], 0.0001)); - } -} -// ------------------------------------------------------------------------------ -// -// 6. Color functions -// -// ------------------------------------------------------------------------------ -pub fn adjustSaturation(color: F32x4, saturation: f32) F32x4 { - const luminance = dot3(f32x4(0.2125, 0.7154, 0.0721, 0.0), color); - var result = mulAdd(color - luminance, f32x4s(saturation), luminance); - result[3] = color[3]; - return result; -} - -pub fn adjustContrast(color: F32x4, contrast: f32) F32x4 { - var result = mulAdd(color - f32x4s(0.5), f32x4s(contrast), f32x4s(0.5)); - result[3] = color[3]; - return result; -} - -pub fn rgbToHsl(rgb: F32x4) F32x4 { - const r = swizzle(rgb, .x, .x, .x, .x); - const g = swizzle(rgb, .y, .y, .y, .y); - const b = swizzle(rgb, .z, .z, .z, .z); - - const minv = min(r, min(g, b)); - const maxv = max(r, max(g, b)); - - const l = (minv + maxv) * f32x4s(0.5); - const d = maxv - minv; - const la = select(boolx4(true, true, true, false), l, rgb); - - if (all(d < f32x4s(math.f32_epsilon), 3)) { - return select(boolx4(true, true, false, false), f32x4s(0.0), la); - } else { - var s: F32x4 = undefined; - var h: F32x4 = undefined; - - const d2 = minv + maxv; - - if (all(l > f32x4s(0.5), 3)) { - s = d / (f32x4s(2.0) - d2); - } else { - s = d / d2; - } - - if (all(r == maxv, 3)) { - h = (g - b) / d; - } else if (all(g == maxv, 3)) { - h = f32x4s(2.0) + (b - r) / d; - } else { - h = f32x4s(4.0) + (r - g) / d; - } - - h /= f32x4s(6.0); - - if (all(h < f32x4s(0.0), 3)) { - h += f32x4s(1.0); - } - - const lha = select(boolx4(true, true, false, false), h, la); - return select(boolx4(true, false, true, true), lha, s); - } -} -test "zmath.color.rgbToHsl" { - try expect(approxEqAbs(rgbToHsl(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.6, 0.5, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 0.5, 0.5), 0.0001)); - try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 0.5, 0.25), 0.0001)); - try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 0.5, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsl(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001)); -} - -fn hueToClr(p: F32x4, q: F32x4, h: F32x4) F32x4 { - var t = h; - - if (all(t < f32x4s(0.0), 3)) - t += f32x4s(1.0); - - if (all(t > f32x4s(1.0), 3)) - t -= f32x4s(1.0); - - if (all(t < f32x4s(1.0 / 6.0), 3)) - return mulAdd(q - p, f32x4s(6.0) * t, p); - - if (all(t < f32x4s(0.5), 3)) - return q; - - if (all(t < f32x4s(2.0 / 3.0), 3)) - return mulAdd(q - p, f32x4s(6.0) * (f32x4s(2.0 / 3.0) - t), p); - - return p; -} - -pub fn hslToRgb(hsl: F32x4) F32x4 { - const s = swizzle(hsl, .y, .y, .y, .y); - const l = swizzle(hsl, .z, .z, .z, .z); - - if (all(isNearEqual(s, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) { - return select(boolx4(true, true, true, false), l, hsl); - } else { - const h = swizzle(hsl, .x, .x, .x, .x); - var q: F32x4 = undefined; - if (all(l < f32x4s(0.5), 3)) { - q = l * (f32x4s(1.0) + s); - } else { - q = (l + s) - (l * s); - } - - const p = f32x4s(2.0) * l - q; - - const r = hueToClr(p, q, h + f32x4s(1.0 / 3.0)); - const g = hueToClr(p, q, h); - const b = hueToClr(p, q, h - f32x4s(1.0 / 3.0)); - - const rg = select(boolx4(true, false, false, false), r, g); - const ba = select(boolx4(true, true, true, false), b, hsl); - return select(boolx4(true, true, false, false), rg, ba); - } -} -test "zmath.color.hslToRgb" { - try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hslToRgb(f32x4(0.6111, 0.6, 0.5, 1.0)), 0.0001)); - try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hslToRgb(f32x4(0.0, 1.0, 0.5, 0.5)), 0.0001)); - try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hslToRgb(f32x4(0.3333, 1.0, 0.5, 0.25)), 0.0005)); - try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hslToRgb(f32x4(0.6666, 1.0, 0.5, 1.0)), 0.0005)); - try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), 0.0001)); - try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hslToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), 0.0001)); - try expect(approxEqAbs(hslToRgb(rgbToHsl(f32x4(1.0, 1.0, 1.0, 1.0))), f32x4(1.0, 1.0, 1.0, 1.0), 0.0005)); - try expect(approxEqAbs( - hslToRgb(rgbToHsl(f32x4(0.82198, 0.1839, 0.632, 1.0))), - f32x4(0.82198, 0.1839, 0.632, 1.0), - 0.0005, - )); - try expect(approxEqAbs( - rgbToHsl(hslToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))), - f32x4(0.82198, 0.1839, 0.632, 1.0), - 0.0005, - )); - try expect(approxEqAbs( - rgbToHsl(hslToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))), - f32x4(0.1839, 0.82198, 0.632, 1.0), - 0.0005, - )); - try expect(approxEqAbs( - hslToRgb(rgbToHsl(f32x4(0.1839, 0.632, 0.82198, 1.0))), - f32x4(0.1839, 0.632, 0.82198, 1.0), - 0.0005, - )); -} - -pub fn rgbToHsv(rgb: F32x4) F32x4 { - const r = swizzle(rgb, .x, .x, .x, .x); - const g = swizzle(rgb, .y, .y, .y, .y); - const b = swizzle(rgb, .z, .z, .z, .z); - - const minv = min(r, min(g, b)); - const v = max(r, max(g, b)); - const d = v - minv; - const s = if (all(isNearEqual(v, f32x4s(0.0), f32x4s(math.f32_epsilon)), 3)) f32x4s(0.0) else d / v; - - if (all(d < f32x4s(math.f32_epsilon), 3)) { - const hv = select(boolx4(true, false, false, false), f32x4s(0.0), v); - const hva = select(boolx4(true, true, true, false), hv, rgb); - return select(boolx4(true, false, true, true), hva, s); - } else { - var h: F32x4 = undefined; - if (all(r == v, 3)) { - h = (g - b) / d; - if (all(g < b, 3)) - h += f32x4s(6.0); - } else if (all(g == v, 3)) { - h = f32x4s(2.0) + (b - r) / d; - } else { - h = f32x4s(4.0) + (r - g) / d; - } - - h /= f32x4s(6.0); - const hv = select(boolx4(true, false, false, false), h, v); - const hva = select(boolx4(true, true, true, false), hv, rgb); - return select(boolx4(true, false, true, true), hva, s); - } -} -test "zmath.color.rgbToHsv" { - try expect(approxEqAbs(rgbToHsv(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.6111, 0.75, 0.8, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.2, 0.8, 1.0)), f32x4(0.7222, 0.75, 0.8, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(0.4, 0.8, 0.2, 1.0)), f32x4(0.2777, 0.75, 0.8, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 0.0, 0.0, 0.5)), f32x4(0.0, 1.0, 1.0, 0.5), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 1.0, 0.0, 0.25)), f32x4(0.3333, 1.0, 1.0, 0.25), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 1.0, 1.0)), f32x4(0.6666, 1.0, 1.0, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(0.0, 0.0, 0.0, 1.0)), f32x4(0.0, 0.0, 0.0, 1.0), 0.0001)); - try expect(approxEqAbs(rgbToHsv(f32x4(1.0, 1.0, 1.0, 1.0)), f32x4(0.0, 0.0, 1.0, 1.0), 0.0001)); -} - -pub fn hsvToRgb(hsv: F32x4) F32x4 { - const h = swizzle(hsv, .x, .x, .x, .x); - const s = swizzle(hsv, .y, .y, .y, .y); - const v = swizzle(hsv, .z, .z, .z, .z); - - const h6 = h * f32x4s(6.0); - const i = floor(h6); - const f = h6 - i; - - const p = v * (f32x4s(1.0) - s); - const q = v * (f32x4s(1.0) - f * s); - const t = v * (f32x4s(1.0) - (f32x4s(1.0) - f) * s); - - const ii = @floatToInt(i32, mod(i, f32x4s(6.0))[0]); - const rgb = switch (ii) { - 0 => blk: { - const vt = select(boolx4(true, false, false, false), v, t); - break :blk select(boolx4(true, true, false, false), vt, p); - }, - 1 => blk: { - const qv = select(boolx4(true, false, false, false), q, v); - break :blk select(boolx4(true, true, false, false), qv, p); - }, - 2 => blk: { - const pv = select(boolx4(true, false, false, false), p, v); - break :blk select(boolx4(true, true, false, false), pv, t); - }, - 3 => blk: { - const pq = select(boolx4(true, false, false, false), p, q); - break :blk select(boolx4(true, true, false, false), pq, v); - }, - 4 => blk: { - const tp = select(boolx4(true, false, false, false), t, p); - break :blk select(boolx4(true, true, false, false), tp, v); - }, - 5 => blk: { - const vp = select(boolx4(true, false, false, false), v, p); - break :blk select(boolx4(true, true, false, false), vp, q); - }, - else => unreachable, - }; - return select(boolx4(true, true, true, false), rgb, hsv); -} -test "zmath.color.hsvToRgb" { - const epsilon = 0.0005; - try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), hsvToRgb(f32x4(0.6111, 0.75, 0.8, 1.0)), epsilon)); - try expect(approxEqAbs(f32x4(0.4, 0.2, 0.8, 1.0), hsvToRgb(f32x4(0.7222, 0.75, 0.8, 1.0)), epsilon)); - try expect(approxEqAbs(f32x4(0.4, 0.8, 0.2, 1.0), hsvToRgb(f32x4(0.2777, 0.75, 0.8, 1.0)), epsilon)); - try expect(approxEqAbs(f32x4(1.0, 0.0, 0.0, 0.5), hsvToRgb(f32x4(0.0, 1.0, 1.0, 0.5)), epsilon)); - try expect(approxEqAbs(f32x4(0.0, 1.0, 0.0, 0.25), hsvToRgb(f32x4(0.3333, 1.0, 1.0, 0.25)), epsilon)); - try expect(approxEqAbs(f32x4(0.0, 0.0, 1.0, 1.0), hsvToRgb(f32x4(0.6666, 1.0, 1.0, 1.0)), epsilon)); - try expect(approxEqAbs(f32x4(0.0, 0.0, 0.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 0.0, 1.0)), epsilon)); - try expect(approxEqAbs(f32x4(1.0, 1.0, 1.0, 1.0), hsvToRgb(f32x4(0.0, 0.0, 1.0, 1.0)), epsilon)); - try expect(approxEqAbs( - hsvToRgb(rgbToHsv(f32x4(0.1839, 0.632, 0.82198, 1.0))), - f32x4(0.1839, 0.632, 0.82198, 1.0), - epsilon, - )); - try expect(approxEqAbs( - hsvToRgb(rgbToHsv(f32x4(0.82198, 0.1839, 0.632, 1.0))), - f32x4(0.82198, 0.1839, 0.632, 1.0), - epsilon, - )); - try expect(approxEqAbs( - rgbToHsv(hsvToRgb(f32x4(0.82198, 0.1839, 0.632, 1.0))), - f32x4(0.82198, 0.1839, 0.632, 1.0), - epsilon, - )); - try expect(approxEqAbs( - rgbToHsv(hsvToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))), - f32x4(0.1839, 0.82198, 0.632, 1.0), - epsilon, - )); -} - -pub fn rgbToSrgb(rgb: F32x4) F32x4 { - const static = struct { - const cutoff = f32x4(0.0031308, 0.0031308, 0.0031308, 1.0); - const linear = f32x4(12.92, 12.92, 12.92, 1.0); - const scale = f32x4(1.055, 1.055, 1.055, 1.0); - const bias = f32x4(0.055, 0.055, 0.055, 1.0); - const rgamma = 1.0 / 2.4; - }; - var v = saturate(rgb); - const v0 = v * static.linear; - const v1 = static.scale * f32x4( - math.pow(f32, v[0], static.rgamma), - math.pow(f32, v[1], static.rgamma), - math.pow(f32, v[2], static.rgamma), - v[3], - ) - static.bias; - v = select(v < static.cutoff, v0, v1); - return select(boolx4(true, true, true, false), v, rgb); -} -test "zmath.color.rgbToSrgb" { - const epsilon = 0.001; - try expect(approxEqAbs(rgbToSrgb(f32x4(0.2, 0.4, 0.8, 1.0)), f32x4(0.484, 0.665, 0.906, 1.0), epsilon)); -} - -pub fn srgbToRgb(srgb: F32x4) F32x4 { - const static = struct { - const cutoff = f32x4(0.04045, 0.04045, 0.04045, 1.0); - const rlinear = f32x4(1.0 / 12.92, 1.0 / 12.92, 1.0 / 12.92, 1.0); - const scale = f32x4(1.0 / 1.055, 1.0 / 1.055, 1.0 / 1.055, 1.0); - const bias = f32x4(0.055, 0.055, 0.055, 1.0); - const gamma = 2.4; - }; - var v = saturate(srgb); - const v0 = v * static.rlinear; - var v1 = static.scale * (v + static.bias); - v1 = f32x4( - math.pow(f32, v1[0], static.gamma), - math.pow(f32, v1[1], static.gamma), - math.pow(f32, v1[2], static.gamma), - v1[3], - ); - v = select(v > static.cutoff, v1, v0); - return select(boolx4(true, true, true, false), v, srgb); -} -test "zmath.color.srgbToRgb" { - const epsilon = 0.0007; - try expect(approxEqAbs(f32x4(0.2, 0.4, 0.8, 1.0), srgbToRgb(f32x4(0.484, 0.665, 0.906, 1.0)), epsilon)); - try expect(approxEqAbs( - rgbToSrgb(srgbToRgb(f32x4(0.1839, 0.82198, 0.632, 1.0))), - f32x4(0.1839, 0.82198, 0.632, 1.0), - epsilon, - )); -} -// ------------------------------------------------------------------------------ -// -// X. Misc functions -// -// ------------------------------------------------------------------------------ -pub fn linePointDistance(linept0: Vec, linept1: Vec, pt: Vec) F32x4 { - const ptvec = pt - linept0; - const linevec = linept1 - linept0; - const scale = dot3(ptvec, linevec) / lengthSq3(linevec); - return length3(ptvec - linevec * scale); -} -test "zmath.linePointDistance" { - { - const linept0 = f32x4(-1.0, -2.0, -3.0, 1.0); - const linept1 = f32x4(1.0, 2.0, 3.0, 1.0); - const pt = f32x4(1.0, 1.0, 1.0, 1.0); - var v = linePointDistance(linept0, linept1, pt); - try expect(approxEqAbs(v, splat(F32x4, 0.654), 0.001)); - } -} - -fn sin32(v: f32) f32 { - var y = v - math.tau * @round(v * 1.0 / math.tau); - - if (y > 0.5 * math.pi) { - y = math.pi - y; - } else if (y < -math.pi * 0.5) { - y = -math.pi - y; - } - const y2 = y * y; - - // 11-degree minimax approximation - var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06); - sinv = mulAdd(sinv, y2, -0.00019840874); - sinv = mulAdd(sinv, y2, 0.0083333310); - sinv = mulAdd(sinv, y2, -0.16666667); - return y * mulAdd(sinv, y2, 1.0); -} -fn cos32(v: f32) f32 { - var y = v - math.tau * @round(v * 1.0 / math.tau); - - const sign = blk: { - if (y > 0.5 * math.pi) { - y = math.pi - y; - break :blk @as(f32, -1.0); - } else if (y < -math.pi * 0.5) { - y = -math.pi - y; - break :blk @as(f32, -1.0); - } else { - break :blk @as(f32, 1.0); - } - }; - const y2 = y * y; - - // 10-degree minimax approximation - var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05); - cosv = mulAdd(cosv, y2, -0.0013888378); - cosv = mulAdd(cosv, y2, 0.041666638); - cosv = mulAdd(cosv, y2, -0.5); - return sign * mulAdd(cosv, y2, 1.0); -} -fn sincos32(v: f32) [2]f32 { - var y = v - math.tau * @round(v * 1.0 / math.tau); - - const sign = blk: { - if (y > 0.5 * math.pi) { - y = math.pi - y; - break :blk @as(f32, -1.0); - } else if (y < -math.pi * 0.5) { - y = -math.pi - y; - break :blk @as(f32, -1.0); - } else { - break :blk @as(f32, 1.0); - } - }; - const y2 = y * y; - - // 11-degree minimax approximation - var sinv = mulAdd(@as(f32, -2.3889859e-08), y2, 2.7525562e-06); - sinv = mulAdd(sinv, y2, -0.00019840874); - sinv = mulAdd(sinv, y2, 0.0083333310); - sinv = mulAdd(sinv, y2, -0.16666667); - sinv = y * mulAdd(sinv, y2, 1.0); - - // 10-degree minimax approximation - var cosv = mulAdd(@as(f32, -2.6051615e-07), y2, 2.4760495e-05); - cosv = mulAdd(cosv, y2, -0.0013888378); - cosv = mulAdd(cosv, y2, 0.041666638); - cosv = mulAdd(cosv, y2, -0.5); - cosv = sign * mulAdd(cosv, y2, 1.0); - - return .{ sinv, cosv }; -} -test "zmath.sincos32" { - const epsilon = 0.0001; - - try expect(math.isNan(sincos32(math.inf_f32)[0])); - try expect(math.isNan(sincos32(math.inf_f32)[1])); - try expect(math.isNan(sincos32(-math.inf_f32)[0])); - try expect(math.isNan(sincos32(-math.inf_f32)[1])); - try expect(math.isNan(sincos32(math.nan_f32)[0])); - try expect(math.isNan(sincos32(-math.nan_f32)[1])); - - try expect(math.isNan(sin32(math.inf_f32))); - try expect(math.isNan(cos32(math.inf_f32))); - try expect(math.isNan(sin32(-math.inf_f32))); - try expect(math.isNan(cos32(-math.inf_f32))); - try expect(math.isNan(sin32(math.nan_f32))); - try expect(math.isNan(cos32(-math.nan_f32))); - - var f: f32 = -100.0; - var i: u32 = 0; - while (i < 100) : (i += 1) { - const sc = sincos32(f); - const s0 = sin32(f); - const c0 = cos32(f); - const s = @sin(f); - const c = @cos(f); - try expect(math.approxEqAbs(f32, sc[0], s, epsilon)); - try expect(math.approxEqAbs(f32, sc[1], c, epsilon)); - try expect(math.approxEqAbs(f32, s0, s, epsilon)); - try expect(math.approxEqAbs(f32, c0, c, epsilon)); - f += 0.12345 * @intToFloat(f32, i); - } -} - -fn asin32(v: f32) f32 { - const x = @fabs(v); - var omx = 1.0 - x; - if (omx < 0.0) { - omx = 0.0; - } - const root = @sqrt(omx); - - // 7-degree minimax approximation - var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901); - result = mulAdd(result, x, -0.0170881256); - result = mulAdd(result, x, 0.0308918810); - result = mulAdd(result, x, -0.0501743046); - result = mulAdd(result, x, 0.0889789874); - result = mulAdd(result, x, -0.2145988016); - result = root * mulAdd(result, x, 1.5707963050); - - return if (v >= 0.0) 0.5 * math.pi - result else result - 0.5 * math.pi; -} -test "zmath.asin32" { - const epsilon = 0.0001; - - try expect(math.approxEqAbs(f32, asin(@as(f32, -1.1)), -0.5 * math.pi, epsilon)); - try expect(math.approxEqAbs(f32, asin(@as(f32, 1.1)), 0.5 * math.pi, epsilon)); - try expect(math.approxEqAbs(f32, asin(@as(f32, -1000.1)), -0.5 * math.pi, epsilon)); - try expect(math.approxEqAbs(f32, asin(@as(f32, 100000.1)), 0.5 * math.pi, epsilon)); - try expect(math.isNan(asin(math.inf_f32))); - try expect(math.isNan(asin(-math.inf_f32))); - try expect(math.isNan(asin(math.nan_f32))); - try expect(math.isNan(asin(-math.nan_f32))); - - try expect(approxEqAbs(asin(splat(F32x8, -100.0)), splat(F32x8, -0.5 * math.pi), epsilon)); - try expect(approxEqAbs(asin(splat(F32x16, 100.0)), splat(F32x16, 0.5 * math.pi), epsilon)); - try expect(all(isNan(asin(splat(F32x4, math.inf_f32))), 0) == true); - try expect(all(isNan(asin(splat(F32x4, -math.inf_f32))), 0) == true); - try expect(all(isNan(asin(splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(asin(splat(F32x4, math.qnan_f32))), 0) == true); - - var f: f32 = -1.0; - var i: u32 = 0; - while (i < 8) : (i += 1) { - const r0 = asin32(f); - const r1 = math.asin(f); - const r4 = asin(splat(F32x4, f)); - const r8 = asin(splat(F32x8, f)); - const r16 = asin(splat(F32x16, f)); - try expect(math.approxEqAbs(f32, r0, r1, epsilon)); - try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon)); - try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon)); - try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon)); - f += 0.09 * @intToFloat(f32, i); - } -} - -fn acos32(v: f32) f32 { - const x = @fabs(v); - var omx = 1.0 - x; - if (omx < 0.0) { - omx = 0.0; - } - const root = @sqrt(omx); - - // 7-degree minimax approximation - var result = mulAdd(@as(f32, -0.0012624911), x, 0.0066700901); - result = mulAdd(result, x, -0.0170881256); - result = mulAdd(result, x, 0.0308918810); - result = mulAdd(result, x, -0.0501743046); - result = mulAdd(result, x, 0.0889789874); - result = mulAdd(result, x, -0.2145988016); - result = root * mulAdd(result, x, 1.5707963050); - - return if (v >= 0.0) result else math.pi - result; -} -test "zmath.acos32" { - const epsilon = 0.1; - - try expect(math.approxEqAbs(f32, acos(@as(f32, -1.1)), math.pi, epsilon)); - try expect(math.approxEqAbs(f32, acos(@as(f32, -10000.1)), math.pi, epsilon)); - try expect(math.approxEqAbs(f32, acos(@as(f32, 1.1)), 0.0, epsilon)); - try expect(math.approxEqAbs(f32, acos(@as(f32, 1000.1)), 0.0, epsilon)); - try expect(math.isNan(acos(math.inf_f32))); - try expect(math.isNan(acos(-math.inf_f32))); - try expect(math.isNan(acos(math.nan_f32))); - try expect(math.isNan(acos(-math.nan_f32))); - - try expect(approxEqAbs(acos(splat(F32x8, -100.0)), splat(F32x8, math.pi), epsilon)); - try expect(approxEqAbs(acos(splat(F32x16, 100.0)), splat(F32x16, 0.0), epsilon)); - try expect(all(isNan(acos(splat(F32x4, math.inf_f32))), 0) == true); - try expect(all(isNan(acos(splat(F32x4, -math.inf_f32))), 0) == true); - try expect(all(isNan(acos(splat(F32x4, math.nan_f32))), 0) == true); - try expect(all(isNan(acos(splat(F32x4, math.qnan_f32))), 0) == true); - - var f: f32 = -1.0; - var i: u32 = 0; - while (i < 8) : (i += 1) { - const r0 = acos32(f); - const r1 = math.acos(f); - const r4 = acos(splat(F32x4, f)); - const r8 = acos(splat(F32x8, f)); - const r16 = acos(splat(F32x16, f)); - try expect(math.approxEqAbs(f32, r0, r1, epsilon)); - try expect(approxEqAbs(r4, splat(F32x4, r1), epsilon)); - try expect(approxEqAbs(r8, splat(F32x8, r1), epsilon)); - try expect(approxEqAbs(r16, splat(F32x16, r1), epsilon)); - f += 0.09 * @intToFloat(f32, i); - } -} - -pub fn modAngle32(in_angle: f32) f32 { - const angle = in_angle + math.pi; - var temp: f32 = @fabs(angle); - temp = temp - (2.0 * math.pi * @intToFloat(f32, @floatToInt(i32, temp / math.pi))); - temp = temp - math.pi; - if (angle < 0.0) { - temp = -temp; - } - return temp; -} - -pub fn cmulSoa(re0: anytype, im0: anytype, re1: anytype, im1: anytype) [2]@TypeOf(re0, im0, re1, im1) { - const re0_re1 = re0 * re1; - const re0_im1 = re0 * im1; - return .{ - mulAdd(-im0, im1, re0_re1), // re - mulAdd(re1, im0, re0_im1), // im - }; -} -// ------------------------------------------------------------------------------ -// -// FFT (implementation based on xdsp.h from DirectXMath) -// -// ------------------------------------------------------------------------------ -fn fftButterflyDit4_1(re0: *F32x4, im0: *F32x4) void { - const re0l = swizzle(re0.*, .x, .x, .y, .y); - const re0h = swizzle(re0.*, .z, .z, .w, .w); - - const im0l = swizzle(im0.*, .x, .x, .y, .y); - const im0h = swizzle(im0.*, .z, .z, .w, .w); - - const re_temp = mulAdd(re0h, f32x4(1.0, -1.0, 1.0, -1.0), re0l); - const im_temp = mulAdd(im0h, f32x4(1.0, -1.0, 1.0, -1.0), im0l); - - const re_shuf0 = @shuffle(f32, re_temp, im_temp, [4]i32{ 2, 3, ~@as(i32, 2), ~@as(i32, 3) }); - const re_shuf = swizzle(re_shuf0, .x, .w, .x, .w); - const im_shuf = swizzle(re_shuf0, .z, .y, .z, .y); - - const re_templ = swizzle(re_temp, .x, .y, .x, .y); - const im_templ = swizzle(im_temp, .x, .y, .x, .y); - - re0.* = mulAdd(re_shuf, f32x4(1.0, 1.0, -1.0, -1.0), re_templ); - im0.* = mulAdd(im_shuf, f32x4(1.0, -1.0, -1.0, 1.0), im_templ); -} - -fn fftButterflyDit4_4( - re0: *F32x4, - re1: *F32x4, - re2: *F32x4, - re3: *F32x4, - im0: *F32x4, - im1: *F32x4, - im2: *F32x4, - im3: *F32x4, - unity_table_re: []const F32x4, - unity_table_im: []const F32x4, - stride: u32, - last: bool, -) void { - const re_temp0 = re0.* + re2.*; - const im_temp0 = im0.* + im2.*; - - const re_temp2 = re1.* + re3.*; - const im_temp2 = im1.* + im3.*; - - const re_temp1 = re0.* - re2.*; - const im_temp1 = im0.* - im2.*; - - const re_temp3 = re1.* - re3.*; - const im_temp3 = im1.* - im3.*; - - var re_temp4 = re_temp0 + re_temp2; - var im_temp4 = im_temp0 + im_temp2; - - var re_temp5 = re_temp1 + im_temp3; - var im_temp5 = im_temp1 - re_temp3; - - var re_temp6 = re_temp0 - re_temp2; - var im_temp6 = im_temp0 - im_temp2; - - var re_temp7 = re_temp1 - im_temp3; - var im_temp7 = im_temp1 + re_temp3; - - { - const re_im = cmulSoa(re_temp5, im_temp5, unity_table_re[stride], unity_table_im[stride]); - re_temp5 = re_im[0]; - im_temp5 = re_im[1]; - } - { - const re_im = cmulSoa(re_temp6, im_temp6, unity_table_re[stride * 2], unity_table_im[stride * 2]); - re_temp6 = re_im[0]; - im_temp6 = re_im[1]; - } - { - const re_im = cmulSoa(re_temp7, im_temp7, unity_table_re[stride * 3], unity_table_im[stride * 3]); - re_temp7 = re_im[0]; - im_temp7 = re_im[1]; - } - - if (last) { - fftButterflyDit4_1(&re_temp4, &im_temp4); - fftButterflyDit4_1(&re_temp5, &im_temp5); - fftButterflyDit4_1(&re_temp6, &im_temp6); - fftButterflyDit4_1(&re_temp7, &im_temp7); - } - - re0.* = re_temp4; - im0.* = im_temp4; - - re1.* = re_temp5; - im1.* = im_temp5; - - re2.* = re_temp6; - im2.* = im_temp6; - - re3.* = re_temp7; - im3.* = im_temp7; -} - -fn fft4(re: []F32x4, im: []F32x4, count: u32) void { - assert(std.math.isPowerOfTwo(count)); - assert(re.len >= count); - assert(im.len >= count); - - var index: u32 = 0; - while (index < count) : (index += 1) { - fftButterflyDit4_1(&re[index], &im[index]); - } -} -test "zmath.fft4" { - const epsilon = 0.0001; - var re = [_]F32x4{f32x4(1.0, 2.0, 3.0, 4.0)}; - var im = [_]F32x4{f32x4s(0.0)}; - fft4(re[0..], im[0..], 1); - - var re_uns: [1]F32x4 = undefined; - var im_uns: [1]F32x4 = undefined; - fftUnswizzle(re[0..], re_uns[0..]); - fftUnswizzle(im[0..], im_uns[0..]); - - try expect(approxEqAbs(re_uns[0], f32x4(10.0, -2.0, -2.0, -2.0), epsilon)); - try expect(approxEqAbs(im_uns[0], f32x4(0.0, 2.0, 0.0, -2.0), epsilon)); -} - -fn fft8(re: []F32x4, im: []F32x4, count: u32) void { - assert(std.math.isPowerOfTwo(count)); - assert(re.len >= 2 * count); - assert(im.len >= 2 * count); - - var index: u32 = 0; - while (index < count) : (index += 1) { - var pre = re[index * 2 ..]; - var pim = im[index * 2 ..]; - - var odds_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); - var evens_re = @shuffle(f32, pre[0], pre[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); - var odds_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 1, 3, ~@as(i32, 1), ~@as(i32, 3) }); - var evens_im = @shuffle(f32, pim[0], pim[1], [4]i32{ 0, 2, ~@as(i32, 0), ~@as(i32, 2) }); - fftButterflyDit4_1(&odds_re, &odds_im); - fftButterflyDit4_1(&evens_re, &evens_im); - - { - const re_im = cmulSoa( - odds_re, - odds_im, - f32x4(1.0, 0.70710677, 0.0, -0.70710677), - f32x4(0.0, -0.70710677, -1.0, -0.70710677), - ); - pre[0] = evens_re + re_im[0]; - pim[0] = evens_im + re_im[1]; - } - { - const re_im = cmulSoa( - odds_re, - odds_im, - f32x4(-1.0, -0.70710677, 0.0, 0.70710677), - f32x4(0.0, 0.70710677, 1.0, 0.70710677), - ); - pre[1] = evens_re + re_im[0]; - pim[1] = evens_im + re_im[1]; - } - } -} -test "zmath.fft8" { - const epsilon = 0.0001; - var re = [_]F32x4{ f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0) }; - var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0) }; - fft8(re[0..], im[0..], 1); - - var re_uns: [2]F32x4 = undefined; - var im_uns: [2]F32x4 = undefined; - fftUnswizzle(re[0..], re_uns[0..]); - fftUnswizzle(im[0..], im_uns[0..]); - - try expect(approxEqAbs(re_uns[0], f32x4(36.0, -4.0, -4.0, -4.0), epsilon)); - try expect(approxEqAbs(re_uns[1], f32x4(-4.0, -4.0, -4.0, -4.0), epsilon)); - try expect(approxEqAbs(im_uns[0], f32x4(0.0, 9.656854, 4.0, 1.656854), epsilon)); - try expect(approxEqAbs(im_uns[1], f32x4(0.0, -1.656854, -4.0, -9.656854), epsilon)); -} - -fn fft16(re: []F32x4, im: []F32x4, count: u32) void { - assert(std.math.isPowerOfTwo(count)); - assert(re.len >= 4 * count); - assert(im.len >= 4 * count); - - const static = struct { - const unity_table_re = [4]F32x4{ - f32x4(1.0, 1.0, 1.0, 1.0), - f32x4(1.0, 0.92387950, 0.70710677, 0.38268343), - f32x4(1.0, 0.70710677, -4.3711388e-008, -0.70710677), - f32x4(1.0, 0.38268343, -0.70710677, -0.92387950), - }; - const unity_table_im = [4]F32x4{ - f32x4(-0.0, -0.0, -0.0, -0.0), - f32x4(-0.0, -0.38268343, -0.70710677, -0.92387950), - f32x4(-0.0, -0.70710677, -1.0, -0.70710677), - f32x4(-0.0, -0.92387950, -0.70710677, 0.38268343), - }; - }; - - var index: u32 = 0; - while (index < count) : (index += 1) { - fftButterflyDit4_4( - &re[index * 4], - &re[index * 4 + 1], - &re[index * 4 + 2], - &re[index * 4 + 3], - &im[index * 4], - &im[index * 4 + 1], - &im[index * 4 + 2], - &im[index * 4 + 3], - static.unity_table_re[0..], - static.unity_table_im[0..], - 1, - true, - ); - } -} -test "zmath.fft16" { - const epsilon = 0.0001; - var re = [_]F32x4{ - f32x4(1.0, 2.0, 3.0, 4.0), - f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), - f32x4(13.0, 14.0, 15.0, 16.0), - }; - var im = [_]F32x4{ f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0) }; - fft16(re[0..], im[0..], 1); - - var re_uns: [4]F32x4 = undefined; - var im_uns: [4]F32x4 = undefined; - fftUnswizzle(re[0..], re_uns[0..]); - fftUnswizzle(im[0..], im_uns[0..]); - - try expect(approxEqAbs(re_uns[0], f32x4(136.0, -8.0, -8.0, -8.0), epsilon)); - try expect(approxEqAbs(re_uns[1], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon)); - try expect(approxEqAbs(re_uns[2], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon)); - try expect(approxEqAbs(re_uns[3], f32x4(-8.0, -8.0, -8.0, -8.0), epsilon)); - try expect(approxEqAbs(im_uns[0], f32x4(0.0, 40.218716, 19.313708, 11.972846), epsilon)); - try expect(approxEqAbs(im_uns[1], f32x4(8.0, 5.345429, 3.313708, 1.591299), epsilon)); - try expect(approxEqAbs(im_uns[2], f32x4(0.0, -1.591299, -3.313708, -5.345429), epsilon)); - try expect(approxEqAbs(im_uns[3], f32x4(-8.0, -11.972846, -19.313708, -40.218716), epsilon)); -} - -fn fftN(re: []F32x4, im: []F32x4, unity_table: []const F32x4, length: u32, count: u32) void { - assert(length > 16); - assert(std.math.isPowerOfTwo(length)); - assert(std.math.isPowerOfTwo(count)); - assert(re.len >= length * count / 4); - assert(re.len == im.len); - - const total = count * length; - const total_vectors = total / 4; - const stage_vectors = length / 4; - const stage_vectors_mask = stage_vectors - 1; - const stride = length / 16; - const stride_mask = stride - 1; - const stride_inv_mask = ~stride_mask; - - var unity_table_re = unity_table; - var unity_table_im = unity_table[length / 4 ..]; - - var index: u32 = 0; - while (index < total_vectors / 4) : (index += 1) { - const n = (index & stride_inv_mask) * 4 + (index & stride_mask); - fftButterflyDit4_4( - &re[n], - &re[n + stride], - &re[n + stride * 2], - &re[n + stride * 3], - &im[n], - &im[n + stride], - &im[n + stride * 2], - &im[n + stride * 3], - unity_table_re[(n & stage_vectors_mask)..], - unity_table_im[(n & stage_vectors_mask)..], - stride, - false, - ); - } - - if (length > 16 * 4) { - fftN(re, im, unity_table[(length / 2)..], length / 4, count * 4); - } else if (length == 16 * 4) { - fft16(re, im, count * 4); - } else if (length == 8 * 4) { - fft8(re, im, count * 4); - } else if (length == 4 * 4) { - fft4(re, im, count * 4); - } -} -test "zmath.fftN" { - var unity_table: [128]F32x4 = undefined; - const epsilon = 0.0001; - - // 32 samples - { - var re = [_]F32x4{ - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - }; - var im = [_]F32x4{ - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - }; - - fftInitUnityTable(unity_table[0..32]); - fft(re[0..], im[0..], unity_table[0..32]); - - try expect(approxEqAbs(re[0], f32x4(528.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[1], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[2], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[3], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[4], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[5], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[6], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(re[7], f32x4(-16.0, -16.0, -16.0, -16.0), epsilon)); - try expect(approxEqAbs(im[0], f32x4(0.0, 162.450726, 80.437432, 52.744931), epsilon)); - try expect(approxEqAbs(im[1], f32x4(38.627417, 29.933895, 23.945692, 19.496056), epsilon)); - try expect(approxEqAbs(im[2], f32x4(16.0, 13.130861, 10.690858, 8.552178), epsilon)); - try expect(approxEqAbs(im[3], f32x4(6.627417, 4.853547, 3.182598, 1.575862), epsilon)); - try expect(approxEqAbs(im[4], f32x4(0.0, -1.575862, -3.182598, -4.853547), epsilon)); - try expect(approxEqAbs(im[5], f32x4(-6.627417, -8.552178, -10.690858, -13.130861), epsilon)); - try expect(approxEqAbs(im[6], f32x4(-16.0, -19.496056, -23.945692, -29.933895), epsilon)); - try expect(approxEqAbs(im[7], f32x4(-38.627417, -52.744931, -80.437432, -162.450726), epsilon)); - } - - // 64 samples - { - var re = [_]F32x4{ - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - }; - var im = [_]F32x4{ - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - }; - - fftInitUnityTable(unity_table[0..64]); - fft(re[0..], im[0..], unity_table[0..64]); - - try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon)); - var i: u32 = 1; - while (i < 16) : (i += 1) { - try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon)); - } - - const expected = [_]f32{ - 0.0, 0.0, 324.901452, 0.000000, 160.874864, 0.0, 105.489863, 0.000000, - 77.254834, 0.0, 59.867789, 0.0, 47.891384, 0.0, 38.992113, 0.0, - 32.000000, 0.000000, 26.261721, 0.000000, 21.381716, 0.000000, 17.104356, 0.000000, - 13.254834, 0.000000, 9.707094, 0.000000, 6.365196, 0.000000, 3.151725, 0.000000, - 0.000000, 0.000000, -3.151725, 0.000000, -6.365196, 0.000000, -9.707094, 0.000000, - -13.254834, 0.000000, -17.104356, 0.000000, -21.381716, 0.000000, -26.261721, 0.000000, - -32.000000, 0.000000, -38.992113, 0.000000, -47.891384, 0.000000, -59.867789, 0.000000, - -77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000, - }; - for (expected, 0..) |e, ie| { - try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon)); - } - } - - // 128 samples - { - var re = [_]F32x4{ - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - }; - var im = [_]F32x4{ - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - }; - - fftInitUnityTable(unity_table[0..128]); - fft(re[0..], im[0..], unity_table[0..128]); - - try expect(approxEqAbs(re[0], f32x4(2112.0, 0.0, 0.0, 0.0), epsilon)); - var i: u32 = 1; - while (i < 32) : (i += 1) { - try expect(approxEqAbs(re[i], f32x4(-64.0, 0.0, 0.0, 0.0), epsilon)); - } - - const expected = [_]f32{ - 0.000000, 0.000000, 0.000000, 0.000000, 649.802905, 0.000000, 0.000000, 0.000000, - 321.749727, 0.000000, 0.000000, 0.000000, 210.979725, 0.000000, 0.000000, 0.000000, - 154.509668, 0.000000, 0.000000, 0.000000, 119.735578, 0.000000, 0.000000, 0.000000, - 95.782769, 0.000000, 0.000000, 0.000000, 77.984226, 0.000000, 0.000000, 0.000000, - 64.000000, 0.000000, 0.000000, 0.000000, 52.523443, 0.000000, 0.000000, 0.000000, - 42.763433, 0.000000, 0.000000, 0.000000, 34.208713, 0.000000, 0.000000, 0.000000, - 26.509668, 0.000000, 0.000000, 0.000000, 19.414188, 0.000000, 0.000000, 0.000000, - 12.730392, 0.000000, 0.000000, 0.000000, 6.303450, 0.000000, 0.000000, 0.000000, - 0.000000, 0.000000, 0.000000, 0.000000, -6.303450, 0.000000, 0.000000, 0.000000, - -12.730392, 0.000000, 0.000000, 0.000000, -19.414188, 0.000000, 0.000000, 0.000000, - -26.509668, 0.000000, 0.000000, 0.000000, -34.208713, 0.000000, 0.000000, 0.000000, - -42.763433, 0.000000, 0.000000, 0.000000, -52.523443, 0.000000, 0.000000, 0.000000, - -64.000000, 0.000000, 0.000000, 0.000000, -77.984226, 0.000000, 0.000000, 0.000000, - -95.782769, 0.000000, 0.000000, 0.000000, -119.735578, 0.000000, 0.000000, 0.000000, - -154.509668, 0.000000, 0.000000, 0.000000, -210.979725, 0.000000, 0.000000, 0.000000, - -321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000, - }; - for (expected, 0..) |e, ie| { - try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon)); - } - } -} - -fn fftUnswizzle(input: []const F32x4, output: []F32x4) void { - assert(std.math.isPowerOfTwo(input.len)); - assert(input.len == output.len); - assert(input.ptr != output.ptr); - - const log2_length = std.math.log2_int(usize, input.len * 4); - assert(log2_length >= 2); - - const length = input.len; - - const f32_output = @ptrCast([*]f32, output.ptr)[0 .. output.len * 4]; - - const static = struct { - const swizzle_table = [256]u8{ - 0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0, - 0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, 0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4, - 0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8, 0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8, - 0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC, 0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC, - 0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1, 0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1, - 0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5, 0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5, - 0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9, 0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9, - 0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD, 0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD, - 0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2, 0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2, - 0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6, 0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6, - 0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA, 0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA, - 0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE, 0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE, - 0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3, 0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3, - 0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7, 0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7, - 0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB, 0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB, - 0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF, 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF, - }; - }; - - if ((log2_length & 1) == 0) { - const rev32 = @intCast(u6, 32 - log2_length); - var index: usize = 0; - while (index < length) : (index += 1) { - const n = index * 4; - const addr = - (@intCast(usize, static.swizzle_table[n & 0xff]) << 24) | - (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) | - (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) | - @intCast(usize, static.swizzle_table[(n >> 24) & 0xff]); - f32_output[addr >> rev32] = input[index][0]; - f32_output[(0x40000000 | addr) >> rev32] = input[index][1]; - f32_output[(0x80000000 | addr) >> rev32] = input[index][2]; - f32_output[(0xC0000000 | addr) >> rev32] = input[index][3]; - } - } else { - const rev7 = @as(usize, 1) << @intCast(u6, log2_length - 3); - const rev32 = @intCast(u6, 32 - (log2_length - 3)); - var index: usize = 0; - while (index < length) : (index += 1) { - const n = index / 2; - var addr = - (((@intCast(usize, static.swizzle_table[n & 0xff]) << 24) | - (@intCast(usize, static.swizzle_table[(n >> 8) & 0xff]) << 16) | - (@intCast(usize, static.swizzle_table[(n >> 16) & 0xff]) << 8) | - (@intCast(usize, static.swizzle_table[(n >> 24) & 0xff]))) >> rev32) | - ((index & 1) * rev7 * 4); - f32_output[addr] = input[index][0]; - addr += rev7; - f32_output[addr] = input[index][1]; - addr += rev7; - f32_output[addr] = input[index][2]; - addr += rev7; - f32_output[addr] = input[index][3]; - } - } -} - -pub fn fftInitUnityTable(out_unity_table: []F32x4) void { - assert(std.math.isPowerOfTwo(out_unity_table.len)); - assert(out_unity_table.len >= 32 and out_unity_table.len <= 512); - - var unity_table = out_unity_table; - - const v0123 = f32x4(0.0, 1.0, 2.0, 3.0); - var length = out_unity_table.len / 4; - var vlstep = f32x4s(0.5 * math.pi / @intToFloat(f32, length)); - - while (true) { - length /= 4; - var vjp = v0123; - - var j: u32 = 0; - while (j < length) : (j += 1) { - unity_table[j] = f32x4s(1.0); - unity_table[j + length * 4] = f32x4s(0.0); - - var vls = vjp * vlstep; - var sin_cos = sincos(vls); - unity_table[j + length] = sin_cos[1]; - unity_table[j + length * 5] = sin_cos[0] * f32x4s(-1.0); - - var vijp = vjp + vjp; - vls = vijp * vlstep; - sin_cos = sincos(vls); - unity_table[j + length * 2] = sin_cos[1]; - unity_table[j + length * 6] = sin_cos[0] * f32x4s(-1.0); - - vijp = vijp + vjp; - vls = vijp * vlstep; - sin_cos = sincos(vls); - unity_table[j + length * 3] = sin_cos[1]; - unity_table[j + length * 7] = sin_cos[0] * f32x4s(-1.0); - - vjp += f32x4s(4.0); - } - vlstep *= f32x4s(4.0); - unity_table = unity_table[8 * length ..]; - - if (length <= 4) - break; - } -} - -pub fn fft(re: []F32x4, im: []F32x4, unity_table: []const F32x4) void { - const length = @intCast(u32, re.len * 4); - assert(std.math.isPowerOfTwo(length)); - assert(length >= 4 and length <= 512); - assert(re.len == im.len); - - var re_temp_storage: [128]F32x4 = undefined; - var im_temp_storage: [128]F32x4 = undefined; - var re_temp = re_temp_storage[0..re.len]; - var im_temp = im_temp_storage[0..im.len]; - - std.mem.copy(F32x4, re_temp, re); - std.mem.copy(F32x4, im_temp, im); - - if (length > 16) { - assert(unity_table.len == length); - fftN(re_temp, im_temp, unity_table, length, 1); - } else if (length == 16) { - fft16(re_temp, im_temp, 1); - } else if (length == 8) { - fft8(re_temp, im_temp, 1); - } else if (length == 4) { - fft4(re_temp, im_temp, 1); - } - - fftUnswizzle(re_temp, re); - fftUnswizzle(im_temp, im); -} - -pub fn ifft(re: []F32x4, im: []const F32x4, unity_table: []const F32x4) void { - const length = @intCast(u32, re.len * 4); - assert(std.math.isPowerOfTwo(length)); - assert(length >= 4 and length <= 512); - assert(re.len == im.len); - - var re_temp_storage: [128]F32x4 = undefined; - var im_temp_storage: [128]F32x4 = undefined; - var re_temp = re_temp_storage[0..re.len]; - var im_temp = im_temp_storage[0..im.len]; - - const rnp = f32x4s(1.0 / @intToFloat(f32, length)); - const rnm = f32x4s(-1.0 / @intToFloat(f32, length)); - - for (re, 0..) |_, i| { - re_temp[i] = re[i] * rnp; - im_temp[i] = im[i] * rnm; - } - - if (length > 16) { - assert(unity_table.len == length); - fftN(re_temp, im_temp, unity_table, length, 1); - } else if (length == 16) { - fft16(re_temp, im_temp, 1); - } else if (length == 8) { - fft8(re_temp, im_temp, 1); - } else if (length == 4) { - fft4(re_temp, im_temp, 1); - } - - fftUnswizzle(re_temp, re); -} -test "zmath.ifft" { - var unity_table: [512]F32x4 = undefined; - const epsilon = 0.0001; - - // 64 samples - { - var re = [_]F32x4{ - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - f32x4(1.0, 2.0, 3.0, 4.0), f32x4(5.0, 6.0, 7.0, 8.0), - f32x4(9.0, 10.0, 11.0, 12.0), f32x4(13.0, 14.0, 15.0, 16.0), - f32x4(17.0, 18.0, 19.0, 20.0), f32x4(21.0, 22.0, 23.0, 24.0), - f32x4(25.0, 26.0, 27.0, 28.0), f32x4(29.0, 30.0, 31.0, 32.0), - }; - var im = [_]F32x4{ - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), f32x4s(0.0), - }; - - fftInitUnityTable(unity_table[0..64]); - fft(re[0..], im[0..], unity_table[0..64]); - - try expect(approxEqAbs(re[0], f32x4(1056.0, 0.0, -32.0, 0.0), epsilon)); - var i: u32 = 1; - while (i < 16) : (i += 1) { - try expect(approxEqAbs(re[i], f32x4(-32.0, 0.0, -32.0, 0.0), epsilon)); - } - - ifft(re[0..], im[0..], unity_table[0..64]); - - try expect(approxEqAbs(re[0], f32x4(1.0, 2.0, 3.0, 4.0), epsilon)); - try expect(approxEqAbs(re[1], f32x4(5.0, 6.0, 7.0, 8.0), epsilon)); - try expect(approxEqAbs(re[2], f32x4(9.0, 10.0, 11.0, 12.0), epsilon)); - try expect(approxEqAbs(re[3], f32x4(13.0, 14.0, 15.0, 16.0), epsilon)); - try expect(approxEqAbs(re[4], f32x4(17.0, 18.0, 19.0, 20.0), epsilon)); - try expect(approxEqAbs(re[5], f32x4(21.0, 22.0, 23.0, 24.0), epsilon)); - try expect(approxEqAbs(re[6], f32x4(25.0, 26.0, 27.0, 28.0), epsilon)); - try expect(approxEqAbs(re[7], f32x4(29.0, 30.0, 31.0, 32.0), epsilon)); - } - - // 512 samples - { - var re: [128]F32x4 = undefined; - var im = [_]F32x4{f32x4s(0.0)} ** 128; - - for (&re, 0..) |*v, i| { - const f = @intToFloat(f32, i * 4); - v.* = f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0); - } - - fftInitUnityTable(unity_table[0..512]); - fft(re[0..], im[0..], unity_table[0..512]); - - for (re, 0..) |v, i| { - const f = @intToFloat(f32, i * 4); - try expect(!approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon)); - } - - ifft(re[0..], im[0..], unity_table[0..512]); - - for (re, 0..) |v, i| { - const f = @intToFloat(f32, i * 4); - try expect(approxEqAbs(v, f32x4(f + 1.0, f + 2.0, f + 3.0, f + 4.0), epsilon)); - } - } -} -// ------------------------------------------------------------------------------ -// -// Private functions and constants -// -// ------------------------------------------------------------------------------ -const f32x4_sign_mask1: F32x4 = F32x4{ @bitCast(f32, @as(u32, 0x8000_0000)), 0, 0, 0 }; -const f32x4_mask2: F32x4 = F32x4{ - @bitCast(f32, @as(u32, 0xffff_ffff)), - @bitCast(f32, @as(u32, 0xffff_ffff)), - 0, - 0, -}; -const f32x4_mask3: F32x4 = F32x4{ - @bitCast(f32, @as(u32, 0xffff_ffff)), - @bitCast(f32, @as(u32, 0xffff_ffff)), - @bitCast(f32, @as(u32, 0xffff_ffff)), - 0, -}; - -inline fn splatNegativeZero(comptime T: type) T { - return @splat(veclen(T), @bitCast(f32, @as(u32, 0x8000_0000))); -} -inline fn splatNoFraction(comptime T: type) T { - return @splat(veclen(T), @as(f32, 8_388_608.0)); -} -inline fn splatAbsMask(comptime T: type) T { - return @splat(veclen(T), @bitCast(f32, @as(u32, 0x7fff_ffff))); -} - -fn floatToIntAndBack(v: anytype) @TypeOf(v) { - // This routine won't handle nan, inf and numbers greater than 8_388_608.0 (will generate undefined values). - @setRuntimeSafety(false); - - const T = @TypeOf(v); - const len = veclen(T); - - var vi32: [len]i32 = undefined; - comptime var i: u32 = 0; - // vcvttps2dq - inline while (i < len) : (i += 1) { - vi32[i] = @floatToInt(i32, v[i]); - } - - var vf32: [len]f32 = undefined; - i = 0; - // vcvtdq2ps - inline while (i < len) : (i += 1) { - vf32[i] = @intToFloat(f32, vi32[i]); - } - - return vf32; -} -test "zmath.floatToIntAndBack" { - { - const v = floatToIntAndBack(f32x4(1.1, 2.9, 3.0, -4.5)); - try expect(approxEqAbs(v, f32x4(1.0, 2.0, 3.0, -4.0), 0.0)); - } - { - const v = floatToIntAndBack(f32x8(1.1, 2.9, 3.0, -4.5, 2.5, -2.5, 1.1, -100.2)); - try expect(approxEqAbs(v, f32x8(1.0, 2.0, 3.0, -4.0, 2.0, -2.0, 1.0, -100.0), 0.0)); - } - { - const v = floatToIntAndBack(f32x4(math.inf_f32, 2.9, math.nan_f32, math.qnan_f32)); - try expect(v[1] == 2.0); - } -} - -pub fn approxEqAbs(v0: anytype, v1: anytype, eps: f32) bool { - const T = @TypeOf(v0, v1); - comptime var i: comptime_int = 0; - inline while (i < veclen(T)) : (i += 1) { - if (!math.approxEqAbs(f32, v0[i], v1[i], eps)) { - return false; - } - } - return true; -} - -// ------------------------------------------------------------------------------ -// This software is available under 2 licenses -- choose whichever you prefer. -// ------------------------------------------------------------------------------ -// ALTERNATIVE A - MIT License -// Copyright (c) 2022 Michal Ziulek and Contributors -// Permission is hereby granted, free of charge, to any person obtaining a copy of -// this software and associated documentation files (the "Software"), to deal in -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -// of the Software, and to permit persons to whom the Software is furnished to do -// so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -// ------------------------------------------------------------------------------ -// ALTERNATIVE B - Public Domain (www.unlicense.org) -// This is free and unencumbered software released into the public domain. -// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -// software, either in source code form or as a compiled binary, for any purpose, -// commercial or non-commercial, and by any means. -// In jurisdictions that recognize copyright laws, the author or authors of this -// software dedicate any and all copyright interest in the software to the public -// domain. We make this dedication for the benefit of the public at large and to -// the detriment of our heirs and successors. We intend this dedication to be an -// overt act of relinquishment in perpetuity of all present and future rights to -// this software under copyright law. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -// ------------------------------------------------------------------------------ diff --git a/src/SomaSolve.cpp b/src/SomaSolve.cpp index 6f1007e..77afa30 100644 --- a/src/SomaSolve.cpp +++ b/src/SomaSolve.cpp @@ -1,15 +1,11 @@ -#include -#include -#include #include #include -#include #include #include "VoxelSpace.h" -auto get_dims_input(int dims[3]) -> void { +void get_dims_input(int dims[3]) { std::cout << "Enter dimensions separated by newlines. (x*y*z must not exceed 64)\n"; - auto success = false; + bool success = false; while (!success) { std::cout << "x: "; std::cin >> dims[0]; @@ -18,7 +14,7 @@ auto get_dims_input(int dims[3]) -> void { std::cout << "z: "; std::cin >> dims[2]; - auto size = dims[0]*dims[1]*dims[2]; + int size = dims[0]*dims[1]*dims[2]; if (size <= 64) { success = true; } else { @@ -28,12 +24,12 @@ auto get_dims_input(int dims[3]) -> void { } } -auto get_reprs_input(int units_required) -> std::vector { +std::vector get_reprs_input(int units_required) { std::cout << "Enter bit-representations (big endian, max 64 bits, total 1s must add up to " << units_required << "). press ENTER twice to finish input.\n"; - auto reprs = std::vector(); - auto total_units = 0; + std::vector reprs = std::vector(); + int total_units = 0; while (true) { - auto input = std::string(); + std::string input = std::string(); std::getline(std::cin, input); if (input.size() == 0) { if (total_units == units_required) { @@ -44,12 +40,12 @@ auto get_reprs_input(int units_required) -> std::vector { continue; } } - auto bit_repr = 0ul; - auto i = 0; - auto good_repr = true; + uint64 bit_repr = 0; + int i = 0; + bool good_repr = true; for (auto it = input.rbegin(); it < input.rend(); it++, i++) { if (*it == '1') { - bit_repr |= 1ul << i; + bit_repr |= 1 << i; total_units++; } else if (*it != '0' || i >= 64) { std::cout << "Input invalid. Enter a binary string only with max 64 bits." << '\n'; @@ -64,191 +60,189 @@ auto get_reprs_input(int units_required) -> std::vector { return reprs; } -namespace SomaSolve { - using SomaSolution = std::vector; +typedef std::vector SomaSolution; - struct Solver { - std::vector* input; - std::vector* offsets; - std::vector* solutions; - }; +struct Solver { + std::vector* input; + std::vector* offsets; + std::vector* solutions; +}; - auto STD_SOMA = std::vector{ 23ul, 30ul, 15ul, 1043ul, 24594ul, 12306ul, 11ul }; +std::vector STD_SOMA = { 23ul, 30ul, 15ul, 1043ul, 24594ul, 12306ul, 11ul }; - auto backtrack_solve_iter(std::vector *polycube_input, std::vector *offsets)-> void { - auto num_inputs = offsets->size() - 1; +void backtrack_solve_iter(std::vector *polycube_input, std::vector *offsets) { + int num_inputs = offsets->size() - 1; - auto solns = std::vector(); + std::vector solns = std::vector(); - auto iter_stack = std::vector(); - auto curr_soln_stack = std::vector(); - auto soln_spaces_stack = std::vector(); - soln_spaces_stack.push_back(0ul); + std::vector iter_stack = std::vector(); + std::vector curr_soln_stack = std::vector(); + std::vector soln_spaces_stack = std::vector(); + soln_spaces_stack.push_back(0ul); - auto depth = 0; + int depth = 0; - while (depth >= 0) { - if (depth >= iter_stack.size()) { - iter_stack.push_back(offsets->at(depth)); - } - auto end = offsets->at(depth + 1); - auto broke = false; - for (; iter_stack[depth] < end; iter_stack[depth]++) { - auto next_space = polycube_input->at(iter_stack[depth]); - auto soln_space = soln_spaces_stack[depth]; - std::cout << next_space << " " << soln_space << std::endl; - auto successful_fuse = (soln_space | next_space) == (soln_space ^ next_space); - if (successful_fuse) { - soln_spaces_stack.push_back(soln_space |= next_space); - curr_soln_stack.push_back(iter_stack[depth]); - depth++; - if (curr_soln_stack.size() == num_inputs) { - solns.push_back(1); - curr_soln_stack.pop_back(); - soln_spaces_stack.pop_back(); - depth--; - } else { - depth++; - auto broke = true; - break; - } - } - } - if (!broke) { - curr_soln_stack.pop_back(); - soln_spaces_stack.pop_back(); - depth--; - } + while (depth >= 0) { + if (depth >= iter_stack.size()) { + iter_stack.push_back(offsets->at(depth)); } - std::cout << "Done. Found " << solns.size() << " solutions." << std::endl; - } - - auto backtrack_solve(Solver *solver, uint64_t working_solution = 0ul, int curr_piece = 0) -> void { - auto input = solver->input; - auto offsets = solver->offsets; - auto solutions = solver->solutions; - auto start = offsets->at(curr_piece); - auto end = offsets->at(curr_piece + 1); - auto num_pieces = offsets->size() - 1; - for (int i = start; i < end; i++) { - auto successful_fuse = !Voxel::collides(working_solution, input->at(i)); + int end = offsets->at(depth + 1); + bool broke = false; + for (; iter_stack[depth] < end; iter_stack[depth]++) { + uint64 next_space = polycube_input->at(iter_stack[depth]); + uint64 soln_space = soln_spaces_stack[depth]; + std::cout << next_space << " " << soln_space << std::endl; + bool successful_fuse = (soln_space | next_space) == (soln_space ^ next_space); if (successful_fuse) { - auto new_working_solution = working_solution | input->at(i); - solutions->back().at(curr_piece) = input->at(i); - if (curr_piece == num_pieces - 1) { - auto last_soln = solutions->back(); - solutions->push_back(SomaSolution(last_soln.begin(), last_soln.end())); - return; + soln_spaces_stack.push_back(soln_space |= next_space); + curr_soln_stack.push_back(iter_stack[depth]); + depth++; + if (curr_soln_stack.size() == num_inputs) { + solns.push_back(1); + curr_soln_stack.pop_back(); + soln_spaces_stack.pop_back(); + depth--; } else { - backtrack_solve(solver, new_working_solution, curr_piece + 1); - } - } - } - if (curr_piece == 0) { - solutions->pop_back(); - } - } - - auto get_solution_rotations(SomaSolution *solution, int dims[3]) -> std::vector { - auto result = std::vector(Voxel::NUM_ROTS_3D); - for (int piece_i = 0; piece_i < solution->size(); piece_i++) { - auto space = Voxel::Space{ - .space=solution->at(piece_i), - .dim_x=dims[0], - .dim_y=dims[1], - .dim_z=dims[2], - }; - auto piece_rotations = Voxel::getAllRotations(&space); - for (int rot_i = 0; rot_i < piece_rotations.size(); rot_i++) { - result[rot_i].push_back(piece_rotations[rot_i].space); - } - } - return result; - } - - auto filter_unique(std::vector *solutions, int dims[3]) -> std::vector { - if (solutions->size() == 0) { - return std::vector(); - } - auto unique_solns = std::vector{}; - for (auto &solution : *solutions) { - auto found_match = false; - for (auto &rotation : get_solution_rotations(&solution, dims)) { - for (auto &unique_soln : unique_solns) { - auto is_match = true; - for (int piece_i = 0; piece_i < unique_soln.size(); piece_i++) { - if (rotation[piece_i] != unique_soln[piece_i]) { - is_match = false; - break; - } - } - if (is_match) { - found_match = true; - break; - } - } - if (found_match) { + depth++; + broke = true; break; } } - if (!found_match) { - unique_solns.push_back(SomaSolution(solution)); + } + if (!broke) { + curr_soln_stack.pop_back(); + soln_spaces_stack.pop_back(); + depth--; + } + } + std::cout << "Done. Found " << solns.size() << " solutions." << std::endl; +} + +void backtrack_solve(Solver *solver, uint64 working_solution = 0, int curr_piece = 0) { + std::vector *input = solver->input; + std::vector *offsets = solver->offsets; + std::vector *solutions = solver->solutions; + int start = offsets->at(curr_piece); + int end = offsets->at(curr_piece + 1); + size_t num_pieces = offsets->size() - 1; + for (int i = start; i < end; i++) { + bool successful_fuse = !collides(working_solution, input->at(i)); + if (successful_fuse) { + uint64 new_working_solution = working_solution | input->at(i); + solutions->back().at(curr_piece) = input->at(i); + if (curr_piece == num_pieces - 1) { + std::vector last_soln = solutions->back(); + solutions->push_back(SomaSolution(last_soln.begin(), last_soln.end())); + return; + } else { + backtrack_solve(solver, new_working_solution, curr_piece + 1); } } - return unique_solns; - } - - auto solve(std::vector *reprs_in, int dims[3]) -> std::vector { - auto reprs = *reprs_in; - auto offsets = std::vector(); - auto polycubes = std::vector(); - polycubes.reserve(reprs.size() * 10); - - auto model_space = Voxel::Space{ - .space={}, - .dim_x=dims[0], - .dim_y=dims[1], - .dim_z=dims[2], - }; - - offsets.push_back(0); - auto space = model_space; - space.space = reprs[0]; - Voxel::cullEmptySpace(&space); - auto positions = Voxel::getAllPositionsInPrism(&space, dims); - polycubes.insert(polycubes.end(), positions.begin(), positions.end()); - - for (int i = 1; i < reprs.size(); i++) { - offsets.push_back(polycubes.size()); - auto space = model_space; - space.space = reprs[i]; - Voxel::cullEmptySpace(&space); - auto perms = Voxel::getAllPermutationsInPrism(&space, dims); - polycubes.insert(polycubes.end(), perms.begin(), perms.end()); - } - - offsets.push_back(polycubes.size()); - - auto solutions = std::vector{std::vector(reprs.size())}; - auto solver = Solver{ - .input=&polycubes, - .offsets=&offsets, - .solutions=&solutions, - }; - - backtrack_solve(&solver); - - return filter_unique(solver.solutions, dims); - } - - - auto interactive_cmd_line_solve_soma() -> void { - int dims[3] = { 3, 3, 3 }; - //get_dims_input(dims); - //std::cout << '\n'; - //auto reprs = get_reprs_input(dims[0]*dims[1]*dims[2]); - std::cout << "Great. Calculating solutions...\n"; - auto solutions = SomaSolve::solve(&SomaSolve::STD_SOMA, std::array{ 3, 3, 3 }.data()); - std::cout << solutions.size() << " solutions found." << std::endl; } + if (curr_piece == 0) { + solutions->pop_back(); + } +} + +std::vector get_solution_rotations(SomaSolution *solution, int dims[3]) { + std::vector result = std::vector(NUM_ROTS_3D); + for (int piece_i = 0; piece_i < solution->size(); piece_i++) { + Space space = { + solution->at(piece_i), + dims[0], + dims[1], + dims[2], + }; + std::vector piece_rotations = getAllRotations(&space); + for (int rot_i = 0; rot_i < piece_rotations.size(); rot_i++) { + result[rot_i].push_back(piece_rotations[rot_i].space); + } + } + return result; +} + +std::vector filter_unique(std::vector *solutions, int dims[3]) { + if (solutions->size() == 0) { + return std::vector(); + } + std::vector unique_solns = std::vector{}; + for (std::vector &solution : *solutions) { + bool found_match = false; + for (SomaSolution &rotation : get_solution_rotations(&solution, dims)) { + for (auto &unique_soln : unique_solns) { + bool is_match = true; + for (int piece_i = 0; piece_i < unique_soln.size(); piece_i++) { + if (rotation[piece_i] != unique_soln[piece_i]) { + is_match = false; + break; + } + } + if (is_match) { + found_match = true; + break; + } + } + if (found_match) { + break; + } + } + if (!found_match) { + unique_solns.push_back(SomaSolution(solution)); + } + } + return unique_solns; +} + +std::vector solve(std::vector *reprs_in, int dims[3]) { + std::vector reprs = *reprs_in; + std::vector offsets = std::vector(); + std::vector polycubes = std::vector(); + polycubes.reserve(reprs.size() * 10); + + Space model_space = { + {}, + dims[0], + dims[1], + dims[2], + }; + + offsets.push_back(0); + Space space = model_space; + space.space = reprs[0]; + cullEmptySpace(&space); + std::vector positions = getAllPositionsInPrism(&space, dims); + polycubes.insert(polycubes.end(), positions.begin(), positions.end()); + + for (int i = 1; i < reprs.size(); i++) { + offsets.push_back(polycubes.size()); + Space space = model_space; + space.space = reprs[i]; + cullEmptySpace(&space); + std::vector perms = getAllPermutationsInPrism(&space, dims); + polycubes.insert(polycubes.end(), perms.begin(), perms.end()); + } + + offsets.push_back(polycubes.size()); + + std::vector solutions = {std::vector(reprs.size())}; + Solver solver = { + &polycubes, + &offsets, + &solutions, + }; + + backtrack_solve(&solver); + + return filter_unique(solver.solutions, dims); +} + + +void interactive_cmd_line_solve_soma() { + int dims[3] = { 3, 3, 3 }; + //get_dims_input(dims); + //std::cout << '\n'; + //auto reprs = get_reprs_input(dims[0]*dims[1]*dims[2]); + std::cout << "Great. Calculating solutions...\n"; + std::vector solutions = solve(&STD_SOMA, dims); + std::cout << solutions.size() << " solutions found." << std::endl; } diff --git a/src/SomaSolve.h b/src/SomaSolve.h index bdcbee0..3f69d34 100644 --- a/src/SomaSolve.h +++ b/src/SomaSolve.h @@ -1,10 +1,7 @@ #include #include -namespace SomaSolve { - extern std::vector STD_SOMA; - using SomaSolution = std::vector; - auto solve(std::vector *reprs_in, int dims[3]) -> std::vector; - auto interactive_cmd_line_solve_soma() -> void; -} - +extern std::vector STD_SOMA; +typedef std::vector SomaSolution; +std::vector solve(std::vector *reprs_in, int dims[3]); +void interactive_cmd_line_solve_soma(); diff --git a/src/VoxelSpace.cpp b/src/VoxelSpace.cpp index d2f5aa7..30a27a8 100644 --- a/src/VoxelSpace.cpp +++ b/src/VoxelSpace.cpp @@ -5,284 +5,282 @@ #include #include "VoxelSpace.h" -namespace Voxel { - auto index(int dim_y, int dim_z, int x, int y, int z) -> int { - return dim_y * dim_z * x + dim_z * y + z; - } - - // ┌ ┐ ┌ ┐ ┌ ┐ - // │ 1, 0, 0 │ │ x │ │ x │ - // │ 0, 0, -1 │ * │ y │ = │-z │ - // │ 0, 1, 0 │ │ z │ │ y │ - // └ ┘ └ ┘ └ ┘ - auto newIndexRotX(Space *space, int x, int y, int z) -> int { - return space->dim_z * space->dim_y * x + space->dim_y * (space->dim_z - 1 - z) + y; - } +int index(int dim_y, int dim_z, int x, int y, int z) { + return dim_y * dim_z * x + dim_z * y + z; +} - // ┌ ┐ ┌ ┐ ┌ ┐ - // │ 0, 0, 1 │ │ x │ │ z │ - // │ 0, 1, 0 │ * │ y │ = │-y │ - // │ -1, 0, 0 │ │ z │ │ x │ - // └ ┘ └ ┘ └ ┘ - auto newIndexRotY(Space *space, int x, int y, int z) -> int { - return space->dim_y * space->dim_x * z + space->dim_x * y + (space->dim_x - 1 - x); +// ┌ ┐ ┌ ┐ ┌ ┐ +// │ 1, 0, 0 │ │ x │ │ x │ +// │ 0, 0, -1 │ * │ y │ = │-z │ +// │ 0, 1, 0 │ │ z │ │ y │ +// └ ┘ └ ┘ └ ┘ +int newIndexRotX(Space *space, int x, int y, int z) { + return space->dim_z * space->dim_y * x + space->dim_y * (space->dim_z - 1 - z) + y; +} + +// ┌ ┐ ┌ ┐ ┌ ┐ +// │ 0, 0, 1 │ │ x │ │ z │ +// │ 0, 1, 0 │ * │ y │ = │-y │ +// │ -1, 0, 0 │ │ z │ │ x │ +// └ ┘ └ ┘ └ ┘ +int newIndexRotY(Space *space, int x, int y, int z) { + return space->dim_y * space->dim_x * z + space->dim_x * y + (space->dim_x - 1 - x); +} + +// ┌ ┐ ┌ ┐ ┌ ┐ +// │ 0, -1, 0 │ │ x │ │-y │ +// │ 1, 0, 0 │ * │ y │ = │ x │ +// │ 0, 0, 1 │ │ z │ │ z │ +// └ ┘ └ ┘ └ ┘ +int newIndexRotZ(Space *space, int x, int y, int z) { + return space->dim_x * space->dim_z * (space->dim_y - 1 - y) + space->dim_z * x + z; +} + +uint64 toggle(uint64_t space, int index) { + space ^= 1ul << index; + return space; +} + +uint64 set(uint64_t space, int index, bool val) { + if (val) { + space |= 1ul << index; + } else { + space &= ~(1ul << index); } + return space; +} - // ┌ ┐ ┌ ┐ ┌ ┐ - // │ 0, -1, 0 │ │ x │ │-y │ - // │ 1, 0, 0 │ * │ y │ = │ x │ - // │ 0, 0, 1 │ │ z │ │ z │ - // └ ┘ └ ┘ └ ┘ - auto newIndexRotZ(Space *space, int x, int y, int z) -> int { - return space->dim_x * space->dim_z * (space->dim_y - 1 - y) + space->dim_z * x + z; - } +bool collides(uint64_t a, uint64_t b) { + return (a | b) != (a ^ b); +} - auto toggle(uint64_t space, int index) -> uint64_t { - space ^= 1ul << index; - return space; - } +bool collides(Space *a, Space *b) { + return (a->space | b->space) != (a->space ^ b->space); +} - auto set(uint64_t space, int index, bool val) -> uint64_t { - if (val) { - space |= 1ul << index; - } else { - space &= ~(1ul << index); - } - return space; - } +bool filledAt(Space *space, int x, int y, int z) { + uint64 mask = 1ul << (space->dim_y * space->dim_z * x + space->dim_z * y + z); + return (space->space & mask) != 0ul; +} - auto collides(uint64_t a, uint64_t b) -> bool { - return (a | b) != (a ^ b); - } +Extrema getExtrema(Space *space) { + Extrema extrema = { + 0, + space->dim_x, + 0, + space->dim_y, + 0, + space->dim_z, + }; - auto collides(Space *a, Space *b) -> bool { - return (a->space | b->space) != (a->space ^ b->space); - } - - auto filledAt(Space *space, int x, int y, int z) -> bool { - auto mask = 1ul << (space->dim_y * space->dim_z * x + space->dim_z * y + z); - return (space->space & mask) != 0ul; - } - - auto getExtrema(Space *space) -> Extrema { - auto extrema = Extrema{ - .xMax=0, - .xMin=space->dim_x, - .yMax=0, - .yMin=space->dim_y, - .zMax=0, - .zMin=space->dim_z, - }; - - for (int x = 0; x < space->dim_x; x++) { - for (int y = 0; y < space->dim_y; y++) { - for (int z = 0; z < space->dim_z; z++) { - if (filledAt(space, x, y, z)) { - if (x > extrema.xMax) extrema.xMax = x; - if (x < extrema.xMin) extrema.xMin = x; - if (y > extrema.yMax) extrema.yMax = y; - if (y < extrema.yMin) extrema.yMin = y; - if (z > extrema.zMax) extrema.zMax = z; - if (z < extrema.zMin) extrema.zMin = z; - } + for (int x = 0; x < space->dim_x; x++) { + for (int y = 0; y < space->dim_y; y++) { + for (int z = 0; z < space->dim_z; z++) { + if (filledAt(space, x, y, z)) { + if (x > extrema.xMax) extrema.xMax = x; + if (x < extrema.xMin) extrema.xMin = x; + if (y > extrema.yMax) extrema.yMax = y; + if (y < extrema.yMin) extrema.yMin = y; + if (z > extrema.zMax) extrema.zMax = z; + if (z < extrema.zMin) extrema.zMin = z; } } } - - return extrema; } - auto cullEmptySpace(Space *space) -> void { - auto extrema = getExtrema(space); - auto space_index = 0; - auto newSpace = 0ul; - for (int x = extrema.xMin; x <= extrema.xMax; x++) { - for (int y = extrema.yMin; y <= extrema.yMax; y++) { - for (int z = extrema.zMin; z <= extrema.zMax; z++) { - if (filledAt(space, x, y, z)) { - newSpace |= 1ul << space_index; - } - space_index++; + return extrema; +} + +void cullEmptySpace(Space *space) { + Extrema extrema = getExtrema(space); + int space_index = 0; + uint64 newSpace = 0ul; + for (int x = extrema.xMin; x <= extrema.xMax; x++) { + for (int y = extrema.yMin; y <= extrema.yMax; y++) { + for (int z = extrema.zMin; z <= extrema.zMax; z++) { + if (filledAt(space, x, y, z)) { + newSpace |= 1ul << space_index; + } + space_index++; + } + } + } + space->dim_x = extrema.xMax - extrema.xMin + 1; + space->dim_y = extrema.yMax - extrema.yMin + 1; + space->dim_z = extrema.zMax - extrema.zMin + 1; + space->space = newSpace; +} + +void rotate90X(Space *space) { + uint64 new_space = 0; + for (int x = 0; x < space->dim_x; x++) { + for (int y = 0; y < space->dim_y; y++) { + for (int z = 0; z < space->dim_z; z++) { + if (filledAt(space, x, y, z)) { + new_space |= 1 << newIndexRotX(space, x, y, z); } } } - space->dim_x = extrema.xMax - extrema.xMin + 1; - space->dim_y = extrema.yMax - extrema.yMin + 1; - space->dim_z = extrema.zMax - extrema.zMin + 1; - space->space = newSpace; } + int temp = space->dim_y; + space->dim_y = space->dim_z; + space->dim_z = temp; + space->space = new_space; +} - auto rotate90X(Space *space) -> void { - auto new_space = 0ul; - for (int x = 0; x < space->dim_x; x++) { - for (int y = 0; y < space->dim_y; y++) { - for (int z = 0; z < space->dim_z; z++) { - if (filledAt(space, x, y, z)) { - new_space |= 1 << newIndexRotX(space, x, y, z); - } +void rotate90Y(Space *space) { + uint64 new_space = 0; + for (int x = 0; x < space->dim_x; x++) { + for (int y = 0; y < space->dim_y; y++) { + for (int z = 0; z < space->dim_z; z++) { + if (filledAt(space, x, y, z)) { + new_space |= 1 << newIndexRotY(space, x, y, z); } } } - auto temp = space->dim_y; - space->dim_y = space->dim_z; - space->dim_z = temp; - space->space = new_space; } + int temp = space->dim_x; + space->dim_x = space->dim_z; + space->dim_z = temp; + space->space = new_space; +} - auto rotate90Y(Space *space) -> void { - auto new_space = 0ul; - for (int x = 0; x < space->dim_x; x++) { - for (int y = 0; y < space->dim_y; y++) { - for (int z = 0; z < space->dim_z; z++) { - if (filledAt(space, x, y, z)) { - new_space |= 1 << newIndexRotY(space, x, y, z); - } +void rotate90Z(Space *space) { + uint64 new_space = 0; + for (int x = 0; x < space->dim_x; x++) { + for (int y = 0; y < space->dim_y; y++) { + for (int z = 0; z < space->dim_z; z++) { + if (filledAt(space, x, y, z)) { + new_space |= 1 << newIndexRotZ(space, x, y, z); } } } - auto temp = space->dim_x; - space->dim_x = space->dim_z; - space->dim_z = temp; - space->space = new_space; } + int temp = space->dim_x; + space->dim_x = space->dim_y; + space->dim_y = temp; + space->space = new_space; +} - auto rotate90Z(Space *space) -> void { - auto new_space = 0ul; - for (int x = 0; x < space->dim_x; x++) { - for (int y = 0; y < space->dim_y; y++) { - for (int z = 0; z < space->dim_z; z++) { - if (filledAt(space, x, y, z)) { - new_space |= 1 << newIndexRotZ(space, x, y, z); - } - } +bool isMatch(Space *a, Space *b) { + return a->space == b->space + && a->dim_x == b->dim_x + && a->dim_y == b->dim_y + && a->dim_z == b->dim_z; +} + +void pushNewUniqueSpins(std::vector *existingSpaces, Space* spaceToSpin) { + Space spins[4] = {}; + spins[0] = *spaceToSpin; + for (int i = 0; i < 3; i++) { + spins[i + 1] = spins[i]; + rotate90X(&spins[i + 1]); + } + for (int i = 0; i < 4; i++) { + bool matchFound = false; + for (Space &existingSpace : *existingSpaces) { + if (isMatch(&existingSpace, &spins[i])) { + matchFound = true; + break; } } - auto temp = space->dim_x; - space->dim_x = space->dim_y; - space->dim_y = temp; - space->space = new_space; - } - - auto isMatch(Space *a, Space *b) -> bool { - return a->space == b->space - && a->dim_x == b->dim_x - && a->dim_y == b->dim_y - && a->dim_z == b->dim_z; - } - - auto pushNewUniqueSpins(std::vector *existingSpaces, Space* spaceToSpin) -> void { - Space spins[4] = {}; - spins[0] = *spaceToSpin; - for (int i = 0; i < 3; i++) { - spins[i + 1] = spins[i]; - rotate90X(&spins[i + 1]); + if (!matchFound) { + existingSpaces->push_back(spins[i]); } - for (int i = 0; i < 4; i++) { - auto matchFound = false; - for (auto &existingSpace : *existingSpaces) { - if (isMatch(&existingSpace, &spins[i])) { - matchFound = true; - break; - } - } - if (!matchFound) { - existingSpaces->push_back(spins[i]); - } - } - } - - auto pushXAxisSpins(std::vector *existingSpaces, Space* spaceToSpin) -> void { - auto refSpace = *spaceToSpin; - for (int i = 0; i < 4; i++) { - rotate90X(&refSpace); - existingSpaces->push_back(refSpace); - } - } - - auto getUniqueRotations(Space *space) -> std::vector { - auto rotations = std::vector(); - rotations.reserve(24); - auto refSpace = *space; - cullEmptySpace(&refSpace); - pushNewUniqueSpins(&rotations, &refSpace); - rotate90Y(&refSpace); - pushNewUniqueSpins(&rotations, &refSpace); - rotate90Y(&refSpace); - pushNewUniqueSpins(&rotations, &refSpace); - rotate90Y(&refSpace); - pushNewUniqueSpins(&rotations, &refSpace); - rotate90Z(&refSpace); - pushNewUniqueSpins(&rotations, &refSpace); - rotate90Z(&refSpace); - rotate90Z(&refSpace); - pushNewUniqueSpins(&rotations, &refSpace); - return rotations; - } - - auto getAllRotations(Space *space) -> std::vector { - auto rotations = std::vector(); - rotations.reserve(24); - auto refSpace = *space; - pushXAxisSpins(&rotations, &refSpace); - rotate90Y(&refSpace); - pushXAxisSpins(&rotations, &refSpace); - rotate90Y(&refSpace); - pushXAxisSpins(&rotations, &refSpace); - rotate90Y(&refSpace); - pushXAxisSpins(&rotations, &refSpace); - rotate90Z(&refSpace); - pushXAxisSpins(&rotations, &refSpace); - rotate90Z(&refSpace); - rotate90Z(&refSpace); - pushXAxisSpins(&rotations, &refSpace); - return rotations; - } - - auto getAllPositionsInPrism(Space *space, int prism_dims[3]) -> std::vector { - auto cubePositions = std::vector(); - if (space->dim_x > prism_dims[0] || space->dim_y > prism_dims[1] || space->dim_z > prism_dims[2]) { - return cubePositions; - } - auto xPositionCount = prism_dims[0] - space->dim_x + 1; - auto yPositionCount = prism_dims[1] - space->dim_y + 1; - auto zPositionCount = prism_dims[2] - space->dim_z + 1; - for (int x = 0; x < xPositionCount; x++) { - for (int y = 0; y < yPositionCount; y++) { - for (int z = 0; z < zPositionCount; z++) { - auto new_space = 0ul; - for (int posX = 0; posX < space->dim_x; posX++) { - for (int posY = 0; posY < space->dim_y; posY++) { - for (int posZ = 0; posZ < space->dim_z; posZ++) { - auto set_val = filledAt(space, posX, posY, posZ); - auto index_to_set = index(prism_dims[1], prism_dims[2], x + posX, y + posY, z + posZ); - new_space = set(new_space, index_to_set, set_val); - } - } - } - cubePositions.push_back(new_space); - } - } - } - return cubePositions; - } - - auto getAllPermutationsInPrism(Space *space, int prism_dims[3]) -> std::vector { - auto rotations = getUniqueRotations(space); - auto result = std::vector(); - for (auto &rotation : rotations) { - auto positions = getAllPositionsInPrism(&rotation, prism_dims); - result.insert(result.end(), positions.begin(), positions.end()); - } - return result; - } - - auto size(uint64_t space) -> int { - auto size = 0; - for (int i = 0; i < 64; i++) { - if ((space & (1ul << i)) != 0) { - size++; - } - } - return size; } } + +void pushXAxisSpins(std::vector *existingSpaces, Space* spaceToSpin) { + Space refSpace = *spaceToSpin; + for (int i = 0; i < 4; i++) { + rotate90X(&refSpace); + existingSpaces->push_back(refSpace); + } +} + +std::vector getUniqueRotations(Space *space) { + std::vector rotations = std::vector(); + rotations.reserve(24); + auto refSpace = *space; + cullEmptySpace(&refSpace); + pushNewUniqueSpins(&rotations, &refSpace); + rotate90Y(&refSpace); + pushNewUniqueSpins(&rotations, &refSpace); + rotate90Y(&refSpace); + pushNewUniqueSpins(&rotations, &refSpace); + rotate90Y(&refSpace); + pushNewUniqueSpins(&rotations, &refSpace); + rotate90Z(&refSpace); + pushNewUniqueSpins(&rotations, &refSpace); + rotate90Z(&refSpace); + rotate90Z(&refSpace); + pushNewUniqueSpins(&rotations, &refSpace); + return rotations; +} + +std::vector getAllRotations(Space *space) { + std::vector rotations = {}; + rotations.reserve(24); + Space refSpace = *space; + pushXAxisSpins(&rotations, &refSpace); + rotate90Y(&refSpace); + pushXAxisSpins(&rotations, &refSpace); + rotate90Y(&refSpace); + pushXAxisSpins(&rotations, &refSpace); + rotate90Y(&refSpace); + pushXAxisSpins(&rotations, &refSpace); + rotate90Z(&refSpace); + pushXAxisSpins(&rotations, &refSpace); + rotate90Z(&refSpace); + rotate90Z(&refSpace); + pushXAxisSpins(&rotations, &refSpace); + return rotations; +} + +std::vector getAllPositionsInPrism(Space *space, int prism_dims[3]) { + std::vector cubePositions = {}; + if (space->dim_x > prism_dims[0] || space->dim_y > prism_dims[1] || space->dim_z > prism_dims[2]) { + return cubePositions; + } + int xPositionCount = prism_dims[0] - space->dim_x + 1; + int yPositionCount = prism_dims[1] - space->dim_y + 1; + int zPositionCount = prism_dims[2] - space->dim_z + 1; + for (int x = 0; x < xPositionCount; x++) { + for (int y = 0; y < yPositionCount; y++) { + for (int z = 0; z < zPositionCount; z++) { + uint64 new_space = 0; + for (int posX = 0; posX < space->dim_x; posX++) { + for (int posY = 0; posY < space->dim_y; posY++) { + for (int posZ = 0; posZ < space->dim_z; posZ++) { + bool set_val = filledAt(space, posX, posY, posZ); + int index_to_set = index(prism_dims[1], prism_dims[2], x + posX, y + posY, z + posZ); + new_space = set(new_space, index_to_set, set_val); + } + } + } + cubePositions.push_back(new_space); + } + } + } + return cubePositions; +} + +std::vector getAllPermutationsInPrism(Space *space, int prism_dims[3]) { + std::vector rotations = getUniqueRotations(space); + std::vector result = std::vector(); + for (auto &rotation : rotations) { + auto positions = getAllPositionsInPrism(&rotation, prism_dims); + result.insert(result.end(), positions.begin(), positions.end()); + } + return result; +} + +int size(uint64_t space) { + int size = 0; + for (int i = 0; i < 64; i++) { + if ((space & (1ul << i)) != 0) { + size++; + } + } + return size; +} diff --git a/src/VoxelSpace.h b/src/VoxelSpace.h index d2e2e7b..d69775a 100644 --- a/src/VoxelSpace.h +++ b/src/VoxelSpace.h @@ -2,67 +2,65 @@ #define VOXELSPACE_H #include -#include +#include "lib/djstdlib/core.h" -namespace Voxel { - constexpr int NUM_ROTS_3D = 24; +constexpr int NUM_ROTS_3D = 24; - struct Extrema { - int xMax; - int xMin; - int yMax; - int yMin; - int zMax; - int zMin; - }; +struct Extrema { + int xMax; + int xMin; + int yMax; + int yMin; + int zMax; + int zMin; +}; - struct Space { - uint64_t space; - int dim_x; - int dim_y; - int dim_z; - }; +struct Space { + uint64 space; + int dim_x; + int dim_y; + int dim_z; +}; - auto newIndexRotX(Space *space, int x, int y, int z) -> int; +int newIndexRotX(Space *space, int x, int y, int z); - auto newIndexRotY(Space *space, int x, int y, int z) -> int; +int newIndexRotY(Space *space, int x, int y, int z); - auto newIndexRotZ(Space *space, int x, int y, int z) -> int; +int newIndexRotZ(Space *space, int x, int y, int z); - auto toggle(uint64_t space, int index) -> uint64_t; +uint64 toggle(uint64 space, int index); - auto set(uint64_t space, int index, bool val) -> uint64_t; +uint64 set(uint64 space, int index, bool val); - auto collides(Space *a, Space *b) -> bool; - auto collides(uint64_t a, uint64_t b) -> bool; +bool collides(Space *a, Space *b); +bool collides(uint64 a, uint64 b); - auto add(Space *a, Space *b) -> Space; +Space add(Space *a, Space *b); - auto filledAt(Space *space, int x, int y, int z) -> bool; +bool filledAt(Space *space, int x, int y, int z); - auto getExtrema(Space *space) -> Extrema; +Extrema getExtrema(Space *space); - auto cullEmptySpace(Space *space) -> void; +void cullEmptySpace(Space *space); - auto isMatch(Space *a, Space *b) -> bool; +bool isMatch(Space *a, Space *b); - auto rotate90X(Space *space) -> void; +void rotate90X(Space *space); - auto rotate90Y(Space *space) -> void; +void rotate90Y(Space *space); - auto rotate90Z(Space *space) -> void; +void rotate90Z(Space *space); - auto pushNewUniqueSpins(std::vector *existingSpaces, Space* spaceToSpin) -> void; +void pushNewUniqueSpins(std::vector *existingSpaces, Space* spaceToSpin); - auto getUniqueRotations(Space *space) -> std::vector; +std::vector getUniqueRotations(Space *space); - auto getAllRotations(Space *space) -> std::vector; +std::vector getAllRotations(Space *space); - auto getAllPositionsInPrism(Space *space, int prism_dims[3]) -> std::vector; +std::vector getAllPositionsInPrism(Space *space, int prism_dims[3]); - auto getAllPermutationsInPrism(Space *space, int prism_dims[3]) -> std::vector; +std::vector getAllPermutationsInPrism(Space *space, int prism_dims[3]); - auto size(uint64_t space) -> int; -} +int size(uint64 space); #endif diff --git a/src/c.zig b/src/c.zig deleted file mode 100644 index b2aa51f..0000000 --- a/src/c.zig +++ /dev/null @@ -1,9 +0,0 @@ -pub usingnamespace @cImport({ - @cInclude("glad/glad.h"); - @cInclude("GLFW/glfw3.h"); - - @cDefine("STB_IMAGE_IMPLEMENTATION", ""); - @cDefine("TINYOBJ_LOADER_C_IMPLEMENTATION", ""); - @cInclude("loaders/stb_image.h"); - @cInclude("loaders/tinyobj.h"); -}); diff --git a/src/gfx/Color.cpp b/src/gfx/Color.cpp index b408234..73e32ac 100644 --- a/src/gfx/Color.cpp +++ b/src/gfx/Color.cpp @@ -1,11 +1,8 @@ -#include #include -#include #include -#include -#include "Color.h" +#include "../lib/djstdlib/core.h" -auto hue_to_rgb(float p, float q, float t) -> float { +real32 hue_to_rgb(float p, float q, float t) { if (t < 0) { t += 1; } else if (t > 1) { @@ -17,7 +14,7 @@ auto hue_to_rgb(float p, float q, float t) -> float { return p; }; -auto hsl_to_hex(float h, float s, float l) -> glm::vec3 { +glm::vec3 hsl_to_hex(real32 h, real32 s, real32 l) { h /= 360; s /= 100; l /= 100; @@ -34,7 +31,7 @@ auto hsl_to_hex(float h, float s, float l) -> glm::vec3 { return glm::vec3(r, g, b); } -auto Color::color_from_index(int index) -> glm::vec3 { +glm::vec3 color_from_index(int index) { auto color_wheel_cycle = floorf(index / 6.0f); auto darkness_cycle = floorf(index / 12.0f); auto spacing = (360.0f / 6.0f); diff --git a/src/gfx/Color.h b/src/gfx/Color.h index 60781bb..1bca765 100644 --- a/src/gfx/Color.h +++ b/src/gfx/Color.h @@ -1,5 +1,3 @@ #include -namespace Color { - auto color_from_index(int index) -> glm::vec3; -}; +glm::vec3 color_from_index(int index); diff --git a/src/gfx/Color.zig b/src/gfx/Color.zig deleted file mode 100644 index 2491d4c..0000000 --- a/src/gfx/Color.zig +++ /dev/null @@ -1,43 +0,0 @@ -fn hue_to_rgb(p: f32, q: f32, t: f32) f32 { - if (t < 0) { - t += 1; - } else if (t > 1) { - t -= 1; - } - if (t < 1.0 / 6) return p + (q - p) * 6 * t; - if (t < 1.0 / 2) return q; - if (t < 2.0 / 3) return p + (q - p) * (2.0 / 3 - t) * 6; - return p; -} - -fn hsl_to_hex(h: f32, s: f32, l: f32) @Vector(3, f32) { - h /= 360; - s /= 100; - l /= 100; - const r: f32; - const g: f32; - const b: f32; - if (s == 0) { - r = l; - g = l; - b = l; - } else { - const q = if (l < 0.5) l * (1 + s) else l + s - l * s; - const p = 2 * l - q; - r = hue_to_rgb(p, q, h + 1.0 / 3); - g = hue_to_rgb(p, q, h); - b = hue_to_rgb(p, q, h - 1.0 / 3); - } - return @Vector(3, f32){ r, g, b }; -} - -pub fn color_from_index(index: i32) @Vector(3, f32) { - const color_wheel_cycle = @floor(index / 6.0); - const darkness_cycle = @floor(index / 12.0); - const spacing = (360.0 / 6.0); - const offset = if (color_wheel_cycle == 0) 0 else spacing / (color_wheel_cycle + 2); - const hue = spacing * (index % 6) + offset; - const saturation = 100.0f; - const lightness = 1.0f / (2 + darkness_cycle) * 100; - return hsl_to_hex(hue, saturation, lightness); -} diff --git a/src/gfx/Mesh.cpp b/src/gfx/Mesh.cpp index 4d5daa2..c9f6ab4 100644 --- a/src/gfx/Mesh.cpp +++ b/src/gfx/Mesh.cpp @@ -1,6 +1,6 @@ #include #include "Mesh.h" -#include "loaders/tinyobj.h" +#include "../lib/loaders/tinyobj.h" auto Mesh::init(const char* obj_file) -> void { auto reader = tinyobj::ObjReader(); diff --git a/src/gfx/Mesh.h b/src/gfx/Mesh.h index 5a435bf..c8a0c7b 100644 --- a/src/gfx/Mesh.h +++ b/src/gfx/Mesh.h @@ -1,7 +1,7 @@ #ifndef LEDDA_MESH_H #define LEDDA_MESH_H -#include "glad/glad.h" +#include "../lib/glad/glad.h" #include "geometry.h" struct Mesh { @@ -11,8 +11,8 @@ struct Mesh { unsigned int vbo_norm; unsigned int ebo; unsigned int num_indices; - auto init(const char* obj_file) -> void; - auto init(const LeddaGeometry::Shape* shape) -> void; + void init(const char* obj_file); + void init(const Shape* shape); }; #endif diff --git a/src/gfx/Mesh.zig b/src/gfx/Mesh.zig deleted file mode 100644 index bac0bf5..0000000 --- a/src/gfx/Mesh.zig +++ /dev/null @@ -1,94 +0,0 @@ -const std = @import("std"); -const c = @import("../c.zig"); -const djleddaGeom = @import("djleddaGeom.zig"); - -pub const Mesh = struct { - vao: c_uint, - vbo_xyz: c_uint, - vbo_uv: c_uint, - vbo_norm: c_uint, - ebo: c_uint, - num_indices: c_uint, - - pub fn from_shape(shape: *const djleddaGeom.Shape) void { - const mesh = Mesh{}; - mesh.num_indices = shape.indices.len; - c.glGenVertexArrays(1, &mesh.vao); - c.glGenBuffers(1, &mesh.vbo_xyz); - c.glGenBuffers(1, &mesh.vbo_uv); - c.glGenBuffers(1, &mesh.ebo); - - c.glBindVertexArray(mesh.vao); - - c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_xyz); - c.glBufferData(c.GL_ARRAY_BUFFER, shape.xyz.ptr * @sizeOf(float), shape.xyz, c.GL_STATIC_DRAW); - c.glVertexAttribPointer(0, 3, c.GL_FLOAT, c.GL_FALSE, 3 * @sizeOf(f32), @as(*void, 0)); - c.glEnableVertexAttribArray(0); - - c.glBindBuffer(c.GL_ARRAY_BUFFER, mesh.vbo_uv); - c.glBufferData(c.GL_ARRAY_BUFFER, shape.uv.ptr * @sizeOf(f32), shape.uv, c.GL_STATIC_DRAW); - c.glVertexAttribPointer(1, 2, c.GL_FLOAT, c.GL_FALSE, 2 * @sizeOf(f32), @as(*void, 0)); - c.glEnableVertexAttribArray(1); - - c.glBindBuffer(c.GL_ELEMENT_ARRAY_BUFFER, mesh.ebo); - c.glBufferData(c.GL_ELEMENT_ARRAY_BUFFER, shape.indices.len * @sizeOf(c_uint), shape.indices.ptr, c.GL_STATIC_DRAW); - } - -// pub fn init(obj_file: *[]const u8) void { -// const reader = c.tinyobj.ObjReader(); -// const success = reader.ParseFromFile(obj_file); -// std.debug.print("{}\n", .{reader.Error()}); -// -// const attrib = reader.GetAttrib(); -// -// const indices_t = reader.GetShapes().at(0).mesh.indices; -// const indices = ArrayList(c_uint)(indices_t.size()); -// -// const vertices = ArrayList()(3*indices_t.size()); -// const normals = ArrayList()(3*indices_t.size()); -// const texcoords = ArrayList()(2*indices_t.size()); -// -// for (int i = 0; i < indices_t.size(); i++) { -// const vertex_data = indices_t[i]; -// vertices[3*i] = attrib.vertices[3*vertex_data.vertex_index]; -// vertices[3*i+1] = attrib.vertices[3*vertex_data.vertex_index + 1]; -// vertices[3*i+2] = attrib.vertices[3*vertex_data.vertex_index + 2]; -// -// normals[3*i] = attrib.normals[3*vertex_data.normal_index]; -// normals[3*i+1] = attrib.normals[3*vertex_data.normal_index + 1]; -// normals[3*i+2] = attrib.normals[3*vertex_data.normal_index + 2]; -// -// texcoords[2*i] = attrib.texcoords[2*vertex_data.texcoord_index]; -// texcoords[2*i+1] = attrib.texcoords[2*vertex_data.texcoord_index + 1]; -// -// indices[i] = i; -// } -// -// num_indices = indices_t.size(); -// glGenVertexArrays(1, &vao); -// glGenBuffers(1, &vbo_xyz); -// glGenBuffers(1, &vbo_uv); -// glGenBuffers(1, &vbo_norm); -// //glGenBuffers(1, &ebo); -// -// glBindVertexArray(vao); -// -// glBindBuffer(GL_ARRAY_BUFFER, vbo_xyz); -// glBufferData(GL_ARRAY_BUFFER, vertices.size() * sizeof(float), vertices.data(), GL_STATIC_DRAW); -// glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0); -// glEnableVertexAttribArray(0); -// -// glBindBuffer(GL_ARRAY_BUFFER, vbo_uv); -// glBufferData(GL_ARRAY_BUFFER, texcoords.size() * sizeof(float), texcoords.data(), GL_STATIC_DRAW); -// glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0); -// glEnableVertexAttribArray(1); -// -// glBindBuffer(GL_ARRAY_BUFFER, vbo_norm); -// glBufferData(GL_ARRAY_BUFFER, normals.size() * sizeof(float), normals.data(), GL_STATIC_DRAW); -// glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0); -// glEnableVertexAttribArray(2); -// -// //glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); -// //glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(unsigned int), indices.data(), GL_STATIC_DRAW); -// } -}; diff --git a/src/gfx/OrbitControls.cpp b/src/gfx/OrbitControls.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/gfx/OrbitControls.h b/src/gfx/OrbitControls.h deleted file mode 100644 index c4e7f7d..0000000 --- a/src/gfx/OrbitControls.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef ORBIT_CONTROLS_H -#define ORBIT_CONTROLS_H - -#include "glad/glad.h" -#include -#include -#include -#include -#include "loaders/stb_image.h" - -constexpr auto ROTATION_FACTOR = 1.0f / 200.0f; - -struct Point { - float x; - float y; -}; - -class OrbitControls { -private: - bool dragging; - bool hovered; - bool scrolling; - bool flyingEnabled; - float lastX; - float lastY; - Point lastScroll1; - Point lastScroll2; - glm::vec3 y_axis; - glm::vec3 x_axis; - glm::vec3 start; - Entity* orbited_object; - - OrbitControls(Entity* orbited, Camera* camera) { - camera = camera; - orbited_object = orbited; - y_axis = orbited_object.worldToLocal(camera.up); - x_axis = orbited_object.position.sub(camera.position); - x_axis /= sqrt(pow(x_axis.x) + pow(x_axis.y, 2) + pow(x_axis.z, 2)); - x_axis = glm::cross(x_axis, y_axis); - start = orbited_object.rotation; - - this.element.addEventListener('wheel', (ev) => this.handleScroll(ev)); - this.element.addEventListener('mouseover', () => this.hovered = true); - this.element.addEventListener('mouseout', () => this.hovered = false); - this.element.addEventListener('mousedown', (ev) => this.handleMouseDown(ev)); - window.addEventListener('mousemove', (ev) => this.handleMove(ev)); - window.addEventListener('mouseup', () => this.dragging = false); - } - - on_mouse_down(event) { - if (event.button === 1) { - this.object.setRotationFromEuler(this.start); - } - if (!this.dragging) { - this.lastX = event.x; - this.lastY = event.y; - this.dragging = true; - } - } - - on_mouse_move(event) { - if (dragging) { - auto x_diff = event.movementX * ROTATION_FACTOR; - auto y_diff = event.movementY * ROTATION_FACTOR; - glm::rotate(&orbited_object, x_diff, &y_axis); - //rotate on world axis ??? - glm::rotate(&orbited_object, y_diff &x_axis); - } - } - - on_scroll(event) { - if (this.flyingEnabled && this.hovered) { - for (const fliable of this.fliables) { - const direction = event.deltaY / Math.abs(event.deltaY); - fliable.flyBy(direction / 10); - } - } - } -} - -#endif diff --git a/src/gfx/Shader.cpp b/src/gfx/Shader.cpp index eed0e28..19c128b 100644 --- a/src/gfx/Shader.cpp +++ b/src/gfx/Shader.cpp @@ -1,43 +1,42 @@ -#include "glad/glad.h" -#include -#include #include #include #include #include "Shader.h" +#include "../lib/djstdlib/core.h" +#include "../lib/glad/glad.h" enum ShaderType { fragment=GL_FRAGMENT_SHADER, vertex=GL_VERTEX_SHADER, }; -auto create_shader(const char* file_path, ShaderType shader_type, char* info_log) -> unsigned int { +uint32 create_shader(const char* file_path, ShaderType shader_type, char* info_log) { std::stringstream shader_stream; std::ifstream shader_file; shader_file.open(file_path); shader_stream << shader_file.rdbuf(); shader_file.close(); - auto shader_string = shader_stream.str(); - const auto shader_code = shader_string.c_str(); + std::string string = shader_stream.str(); + const char *shader_code = string.c_str(); - auto vertex_shader = glCreateShader(shader_type); + GLuint vertex_shader = glCreateShader(shader_type); glShaderSource(vertex_shader, 1, &shader_code, NULL); glCompileShader(vertex_shader); int success; glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &success); if (!success) { glGetShaderInfoLog(vertex_shader, 512, NULL, info_log); - auto shader_type_name = shader_type == ShaderType::fragment ? "FRAGMENT" : "VERTEX"; + const char* shader_type_name = shader_type == ShaderType::fragment ? "FRAGMENT" : "VERTEX"; std::cout << "ERROR::SHADER::" << shader_type_name << "::COMPILATION_FAILED\n" << info_log << std::endl; } return vertex_shader; } -auto Shader::init(const char* vertex_path, const char* fragment_path) -> void { - auto info_log = std::array(); - auto vertex_shader = create_shader(vertex_path, ShaderType::vertex, info_log.data()); - auto fragment_shader = create_shader(fragment_path, ShaderType::fragment, info_log.data()); +void Shader::init(const char* vertex_path, const char* fragment_path) { + char info_log[512] = {0}; + uint32 vertex_shader = create_shader(vertex_path, ShaderType::vertex, info_log); + uint32 fragment_shader = create_shader(fragment_path, ShaderType::fragment, info_log); prog_id = glCreateProgram(); glAttachShader(prog_id, vertex_shader); @@ -47,8 +46,8 @@ auto Shader::init(const char* vertex_path, const char* fragment_path) -> void { int success; glGetProgramiv(prog_id, GL_LINK_STATUS, &success); if (!success) { - glGetProgramInfoLog(prog_id, 512, NULL, info_log.data()); - std::cout << "ERROR::SHADER::PROGRAM::LINK_FAILED\n" << info_log.data() << std::endl; + glGetProgramInfoLog(prog_id, 512, NULL, info_log); + std::cout << "ERROR::SHADER::PROGRAM::LINK_FAILED\n" << info_log << std::endl; } glDeleteShader(vertex_shader); diff --git a/src/gfx/Shader.h b/src/gfx/Shader.h index 7e02dd6..2245035 100644 --- a/src/gfx/Shader.h +++ b/src/gfx/Shader.h @@ -3,7 +3,7 @@ struct Shader { unsigned int prog_id; - auto init(const char* vertex_path, const char* fragment_path) -> void; + void init(const char* vertex_path, const char* fragment_path); }; #endif diff --git a/src/gfx/Shader.zig b/src/gfx/Shader.zig deleted file mode 100644 index 26bcce5..0000000 --- a/src/gfx/Shader.zig +++ /dev/null @@ -1,56 +0,0 @@ -const c = @import("../c.zig"); -const std = @import("std"); - -const ShaderType = enum(u32) { - fragment = c.GL_FRAGMENT_SHADER, - vertex = c.GL_VERTEX_SHADER, -}; - -fn create_shader(file_path: []const u8, shader_type: ShaderType, info_log: *[]const u8, allocator: *std.mem.Allocator) c_uint { - const file = try std.fs.openFileAbsolute(file_path); - - const file_reader = file.reader(file); - const shader_code = std.ArrayList(u8); - shader_code.initCapacity(allocator, 1024); - defer allocator.free(shader_code); - - file_reader.readAllArrayList(shader_code, 1024 * 1024); - - const vertex_shader = c.glCreateShader(shader_type); - c.glShaderSource(vertex_shader, 1, &shader_code.items, c.NULL); - c.glCompileShader(vertex_shader); - const success: i32 = undefined; - c.glGetShaderiv(vertex_shader, c.GL_COMPILE_STATUS, &success); - if (success != 0) { - c.glGetShaderInfoLog(vertex_shader, 512, c.NULL, info_log); - const shader_type_name = if (shader_type == ShaderType.fragment) "FRAGMENT" else "VERTEX"; - std.debug.print("ERROR::SHADER::{}::COMPILATION_FAILED\n{}\n", .{ shader_type_name, info_log }); - } - - return vertex_shader; -} - -const Shader = struct { - prog_id: c_uint, - - pub fn init(self: Shader, vertex_path: *[]const u8, fragment_path: *[]const u8, allocator: *std.mem.Allocator) void { - const info_log = [512]u8{}; - const vertex_shader = create_shader(vertex_path, ShaderType.vertex, &info_log, allocator); - const fragment_shader = create_shader(fragment_path, ShaderType.fragment, &info_log, allocator); - - self.prog_id = c.glCreateProgram(); - c.glAttachShader(self.prog_id, vertex_shader); - c.glAttachShader(self.prog_id, fragment_shader); - c.glLinkProgram(self.prog_id); - - const success: c_uint = undefined; - c.glGetProgramiv(self.prog_id, c.GL_LINK_STATUS, &success); - if (!success) { - c.glGetProgramInfoLog(self.prog_id, 512, c.NULL, &info_log); - std.debug.print("ERROR::SHADER::PROGRAM::LINK_FAILED\n{}\n", .{info_log}); - } - - c.glDeleteShader(vertex_shader); - c.glDeleteShader(fragment_shader); - } -}; diff --git a/src/gfx/Texture.cpp b/src/gfx/Texture.cpp index 12af1a7..35412b5 100644 --- a/src/gfx/Texture.cpp +++ b/src/gfx/Texture.cpp @@ -1,9 +1,9 @@ #include "Texture.h" #include -#include "loaders/stb_image.h" -#include "glad/glad.h" +#include "../lib/loaders/stb_image.h" +#include "../lib/glad/glad.h" -auto Texture::init(const char* source_path) -> void { +void Texture::init(const char* source_path) { glGenTextures(1, &tex_id); glBindTexture(GL_TEXTURE_2D, tex_id); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); @@ -12,7 +12,7 @@ auto Texture::init(const char* source_path) -> void { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); int nr_channels; - auto data = stbi_load(source_path, &width, &height, &nr_channels, 0); + stbi_uc *data = stbi_load(source_path, &width, &height, &nr_channels, 0); if (data) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, width, height, 0, GL_RGB, GL_UNSIGNED_BYTE, data); glGenerateMipmap(GL_TEXTURE_2D); diff --git a/src/gfx/Texture.h b/src/gfx/Texture.h index 19eb7be..2c915c4 100644 --- a/src/gfx/Texture.h +++ b/src/gfx/Texture.h @@ -5,7 +5,7 @@ struct Texture { unsigned int tex_id; int width; int height; - auto init(const char* source_path) -> void; + void init(const char* source_path); }; #endif diff --git a/src/gfx/djleddaGeom.zig b/src/gfx/djleddaGeom.zig deleted file mode 100644 index eb95d8e..0000000 --- a/src/gfx/djleddaGeom.zig +++ /dev/null @@ -1,57 +0,0 @@ -// Buffer layout: -// X, Y, Z, U, V - -pub const Shape = struct { - indices: []c_uint, - uv: []f32, - xyz: []f32, -}; - -const triangle_vertices = []f32{ - -0.5, -0.5, 0.0, 1.0, 1.0, - 0.5, -0.5, 0.0, 0.5, 0.5, - 0.0, 0.5, 0.0, 0.0, 0.0, -}; - -const triangle_indices = []c_uint{ 0, 1, 2 }; - -const cube_vertices = []f32{ -0.5, -0.5, -0.5, 0.0, 0.0, 0.5, -0.5, -0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 1.0, -0.5, 0.5, 0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, -0.5, 1.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, 0.5, -0.5, -0.5, 1.0, 1.0, 0.5, -0.5, 0.5, 1.0, 0.0, 0.5, -0.5, 0.5, 1.0, 0.0, -0.5, -0.5, 0.5, 0.0, 0.0, -0.5, -0.5, -0.5, 0.0, 1.0, -0.5, 0.5, -0.5, 0.0, 1.0, 0.5, 0.5, -0.5, 1.0, 1.0, 0.5, 0.5, 0.5, 1.0, 0.0, 0.5, 0.5, 0.5, 1.0, 0.0, -0.5, 0.5, 0.5, 0.0, 0.0, -0.5, 0.5, -0.5, 0.0, 1.0 }; - -const cube_indices = []c_uint{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; - -const square_xyz = []f32{ - 0.5, 0.5, 0.0, - 0.5, -0.5, 0.0, - -0.5, -0.5, 0.0, - -0.5, 0.5, 0.0, -}; - -const square_uv = []f32{ - 1.0, 1.0, - 1.0, 0.0, - 0.0, 0.0, - 0.0, 1.0, -}; - -const square_indices = []c_uint{ - 0, 1, 3, - 1, 2, 3, -}; - -pub const TRIANGLE = Shape{ - .indices = triangle_indices, - .uv = triangle_vertices, - .xyz = triangle_vertices, -}; - -pub const SQUARE = Shape{ - .indices = square_indices, - .uv = square_uv, - .xyz = square_xyz, -}; - -pub const CUBE = Shape{ - .indices = cube_indices, - .uv = triangle_vertices, - .xyz = triangle_vertices, -}; diff --git a/src/gfx/geometry.cpp b/src/gfx/geometry.cpp index d8900da..79e06d4 100644 --- a/src/gfx/geometry.cpp +++ b/src/gfx/geometry.cpp @@ -1,20 +1,20 @@ -#include #include "geometry.h" +#include "../lib/djstdlib/core.h" // Buffer layout: // X, Y, Z, U, V -auto triangle_vertices = std::to_array({ +real32 triangle_vertices[] = { -0.5f, -0.5f, 0.0f, 1.0f, 1.0f, 0.5f, -0.5f, 0.0f, 0.5f, 0.5f, 0.0f, 0.5f, 0.0f, 0.0f, 0.0f, -}); +}; -auto triangle_indices = std::to_array({ +uint32 triangle_indices[] = { 0, 1, 2 -}); +}; -auto cube_vertices = std::to_array({ +real32 cube_vertices[] = { -0.5f, -0.5f, -0.5f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, 1.0f, 0.0f, 0.5f, 0.5f, -0.5f, 1.0f, 1.0f, @@ -56,57 +56,55 @@ auto cube_vertices = std::to_array({ 0.5f, 0.5f, 0.5f, 1.0f, 0.0f, -0.5f, 0.5f, 0.5f, 0.0f, 0.0f, -0.5f, 0.5f, -0.5f, 0.0f, 1.0f -}); +}; -auto cube_indices = std::to_array({ +uint32 cube_indices[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 -}); +}; -auto square_xyz = std::to_array({ +real32 square_xyz[] = { 0.5f, 0.5f, 0.0f, 0.5f, -0.5f, 0.0f, -0.5f, -0.5f, 0.0f, -0.5f, 0.5f, 0.0f, -}); +}; -auto square_uv = std::to_array({ +real32 square_uv[] = { 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, -}); +}; -auto square_indices = std::to_array({ +uint32 square_indices[] = { 0, 1, 3, 1, 2, 3, -}); +}; -namespace LeddaGeometry { - const Shape TRIANGLE = { - .indices = triangle_indices.data(), - .indices_size = sizeof(triangle_indices), - .uv = triangle_vertices.data(), - .uv_size = sizeof(triangle_vertices), - .xyz = triangle_vertices.data(), - .xyz_size = sizeof(triangle_vertices), - }; +const Shape TRIANGLE = { + .indices = triangle_indices, + .indices_size = ArrayCount(triangle_indices), + .uv = triangle_vertices, + .uv_size = ArrayCount(triangle_vertices), + .xyz = triangle_vertices, + .xyz_size = ArrayCount(triangle_vertices), +}; - const Shape SQUARE = { - .indices = square_indices.data(), - .indices_size = square_indices.size(), - .uv = square_uv.data(), - .uv_size = square_uv.size(), - .xyz = square_xyz.data(), - .xyz_size = square_xyz.size(), - }; +const Shape SQUARE = { + .indices = square_indices, + .indices_size = ArrayCount(square_indices), + .uv = square_uv, + .uv_size = ArrayCount(square_uv), + .xyz = square_xyz, + .xyz_size = ArrayCount(square_xyz), +}; - const Shape CUBE = { - .indices = cube_indices.data(), - .indices_size = cube_indices.size(), - .uv = triangle_vertices.data(), - .uv_size = triangle_vertices.size(), - .xyz = triangle_vertices.data(), - .xyz_size = triangle_vertices.size(), - }; -} +const Shape CUBE = { + .indices = cube_indices, + .indices_size = ArrayCount(cube_indices), + .uv = triangle_vertices, + .uv_size = ArrayCount(triangle_vertices), + .xyz = triangle_vertices, + .xyz_size = ArrayCount(triangle_vertices), +}; diff --git a/src/gfx/geometry.h b/src/gfx/geometry.h index 3bcaa21..0eb5b2d 100644 --- a/src/gfx/geometry.h +++ b/src/gfx/geometry.h @@ -1,20 +1,18 @@ #ifndef LEDDA_GEOMETRY_H #define LEDDA_GEOMETRY_H -#include +#include -namespace LeddaGeometry { - struct Shape { - unsigned int* indices; - size_t indices_size; - float* uv; - size_t uv_size; - float* xyz; - size_t xyz_size; - }; - extern const Shape TRIANGLE; - extern const Shape SQUARE; - extern const Shape CUBE; -} +struct Shape { + unsigned int* indices; + size_t indices_size; + float* uv; + size_t uv_size; + float* xyz; + size_t xyz_size; +}; +extern const Shape TRIANGLE; +extern const Shape SQUARE; +extern const Shape CUBE; #endif diff --git a/lib/c/KHR/khrplatform.h b/src/lib/KHR/khrplatform.h similarity index 100% rename from lib/c/KHR/khrplatform.h rename to src/lib/KHR/khrplatform.h diff --git a/src/lib/djstdlib/app.cpp b/src/lib/djstdlib/app.cpp new file mode 100644 index 0000000..d1a01a3 --- /dev/null +++ b/src/lib/djstdlib/app.cpp @@ -0,0 +1,14 @@ +#include +#include "core.cpp" +#include "core.h" + +int main(int argc, char **argv) { + int statusCode = 0; + initialiseCore(); + Arena *arena = arenaAlloc(Megabytes(64)); + list args = getArgs(arena, argc, argv); + + prinft("%S", strSplit(arena, "-"_s, "hallo-world"_s)); + + return statusCode; +} diff --git a/src/lib/djstdlib/core.cpp b/src/lib/djstdlib/core.cpp new file mode 100644 index 0000000..1d85fc4 --- /dev/null +++ b/src/lib/djstdlib/core.cpp @@ -0,0 +1,511 @@ +#include // TODO(djledda): get outta here +#include +#include +#define STB_SPRINTF_IMPLEMENTATION +#include "core.h" +#include "os.cpp" + +void *pushSize(Arena *arena, size_t bytes) { + if (arena->capacity - arena->head >= bytes) { + void *ptr = (char *)arena->memory + arena->head; + arena->head += bytes; + return ptr; + } + return 0; +} + +Arena *arenaAlloc(size_t capacity) { + Arena *result = (Arena *)os_alloc(sizeof(Arena) + capacity); + result->memory = result + sizeof(Arena); + result->capacity = capacity; + result->head = 0; + return result; +} + +void arenaFree(Arena *arena) { + os_free(arena, arena->capacity); +} + +void arenaFreeFrom(Arena *arena, size_t position) { + arena->head = position; +} + +Arena *scratchArenas[2]; + +void initialiseCore() { + for (EachInArray(scratchArenas, i)) { + scratchArenas[i] = arenaAlloc(Megabytes(64)); + } +} + +Scratch scratchStart(Arena **conflicts, size_t conflictCount) { + Scratch scratch = {0}; + for (size_t i = 0; i < ArrayCount(scratchArenas); i += 1) { + bool conflicted = false; + for (Arena **conflict = conflicts; conflict < conflicts + conflictCount; conflict += 1) { + if (*conflict == scratchArenas[i]) { + conflicted = true; + break; + } + } + if (conflicted == false) { + scratch.arena = scratchArenas[i]; + scratch.start = scratch.arena->head; + break; + } + } + return scratch; +} + +#define DeferLoop(begin_stmnt, end_stmnt) for(int __defer_i = ((begin_stmnt), 0); __defer_i < 1; (++__defer_i, (end_stmnt))) +#define WithScratch(scratchName) Scratch scratchName; DeferLoop(scratchName = scratchStart(0, 0), scratchEnd(scratchName)) + +void scratchEnd(Scratch scratch) { + arenaFreeFrom(scratch.arena, scratch.start); +} + +template +T *appendList(list *list, T element) { + if (list->head < list->length) { + list->data[list->head] = element; + list->head++; + return &(list->data[list->head - 1]); + } else { + return 0; + } +} + +template +void zeroListFull(list *list) { + memset(list->data, 0, list->head * sizeof(T)); +} + +template +void zeroList(list *list) { + list->head = 0; + memset(list->data, 0, list->head * sizeof(T)); +} + +inline string operator""_s(const char *cstrLiteral, unsigned long length) { + return { + (char *)cstrLiteral, + length, + }; +} + +const char *cstring(Arena *arena, list buf) { + char *arr = PushArray(arena, char, buf.length + 1); + memmove(arr, buf.data, buf.length); + arr[buf.length] = '\0'; + return arr; +} + +const char *cstring(Arena *arena, string str) { + char *arr = PushArray(arena, char, str.length + 1); + memmove(arr, str.str, str.length); + arr[str.length] = '\0'; + return arr; +} + +bool strEql(string s1, string s2) { + if (s1.length != s2.length) { + return false; + } + for (size_t i = 0; i < s1.length; i++) { + if (s1.str[i] != s2.str[i]) { + return false; + } + } + return true; +} + +size_t calcStringLen(const char *str) { + size_t size = 0; + if (str == NULL) { + return size; + } + while (str[size] != '\0') { + size++; + } + return size; +} + +string strFromCString(Arena *arena, const char *str) { + string result = PushString(arena, calcStringLen(str)); + memcpy(result.str, str, result.length); + return result; +} + +string strReverse(Arena *arena, string str) { + string reversed = PushString(arena, str.length); + for ( + size_t mainIndex = str.length - 1, reversedIndex = 0; + mainIndex < str.length; + mainIndex--, reversedIndex++ + ) { + reversed.str[reversedIndex] = str.str[mainIndex]; + } + return reversed; +} + +string strPrintfv(Arena *arena, const char *fmt, va_list args) { + string result = {0}; + va_list argsCopy; + va_copy(argsCopy, args); + uint64 bufSize = stb_vsnprintf(0, 0, fmt, args) + 1; + result.str = PushArray(arena, char, bufSize); + result.length = bufSize - 1; + stb_vsnprintf((char *)result.str, (int)bufSize, fmt, argsCopy); + return result; +} + +string strPrintf(Arena *arena, const char *fmt, ...) { + string result = {0}; + va_list args; + va_start(args, fmt); + result = strPrintfv(arena, fmt, args); + va_end(args); + return result; +} + +template +list listSlice(list l, size_t start, size_t stop) { + if (stop == 0) { + stop = l.head; + } + // TODO(djledda): maybe assert instead + if (stop > l.head || start > stop) { + return {0}; + } + return { + l.data + start, + stop - start, + stop - start, + }; +} + +string strSlice(string str, size_t start, size_t stop) { + if (stop == 0) { + stop = str.length; + } + // TODO(djledda): maybe assert instead + if (stop > str.length || start > stop) { + return {0}; + } + return { + str.str + start, + stop - start, + }; +} + +string strSlice(char *data, size_t start, size_t stop) { + return { + data + start, + stop - start, + }; +} + +bool stringContains(string str, char c) { + for (size_t i = 0; i < str.length; i++) { + if (str.str[i] == c) { + return true; + } + } + return false; +} + +string NUMERIC_CHARS = "0123456789"_s; +inline bool isNumeric(char c) { + return stringContains(NUMERIC_CHARS, c); +} + +list strSplit(Arena *arena, string splitStr, string inputStr) { + list result = {0}; + if (inputStr.length > 0) { + size_t splitCount = 0; + size_t c = 0; + size_t start = 0; + void *beginning = (char *)arena->memory + arena->head; + while (c < inputStr.length - splitStr.length) { + if (strEql(strSlice(inputStr, c, c + splitStr.length), splitStr)) { + string *splitString = PushStruct(arena, string); + splitString->str = inputStr.str + start; + splitString->length = c - start; + splitCount++; + start = c + 1; + } + c++; + } + + string *splitString = PushStruct(arena, string); + splitString->str = inputStr.str + start; + splitString->length = inputStr.length - start; + splitCount++; + result.data = (string *)beginning, + result.head = splitCount, + result.length = splitCount; + } + return result; +} + +int8 parsePositiveInt(string str, size_t *lengthPointer) { + size_t numEnd = 0; + char currChar = str.str[numEnd]; + while (numEnd < str.length && isNumeric(currChar)) { + currChar = str.str[++numEnd]; + *lengthPointer += 1; + } + *lengthPointer -= 1; + if (numEnd > 0) { + uint8 result = 0; + for (size_t i = 0; i < numEnd; i++) { + result *= 10; + result += str.str[i] - '0'; + } + return result; + } else { + return -1; + } +} + +real32 parsePositiveReal32(string str, size_t *lengthPointer) { + real32 result = NAN; + + string wholePartStr = string{0}; + string fractionalPartStr = string{0}; + + bool split = false; + size_t c = 0; + while (c < str.length) { + if (str.str[c] == '.') { + wholePartStr.str = str.str; + wholePartStr.length = c; + fractionalPartStr.str = str.str + c + 1; + fractionalPartStr.length = str.length - c - 1; + split = true; + break; + } + c++; + } + if (split) { + int wholePart = parsePositiveInt(wholePartStr, lengthPointer); + *lengthPointer += 1; + int fractionalPart = parsePositiveInt(fractionalPartStr, lengthPointer); + if (wholePart >= 0 && fractionalPart >= 0) { + real32 fractionalPartMultiplier = 1.0f / powf(10.0f, (real32)fractionalPartStr.length); + result = (real32)wholePart + (real32)fractionalPart * (real32)fractionalPartMultiplier; + } + } else if (c > 0) { + result = (real32)parsePositiveInt(str, lengthPointer); + } + return result; +} + +string readEntireFile(Arena *arena, string filename) { +#if OS_WINDOWS + string result = {0}; + HANDLE fileHandle = CreateFileA(cstring(arena, filename), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, NULL, NULL); + if (fileHandle != INVALID_HANDLE_VALUE) { + LARGE_INTEGER fileSize; + if (GetFileSizeEx(fileHandle, &fileSize)) { + string readfile = PushString(arena, (size_t)fileSize.QuadPart); + if (readfile.str) { + DWORD bytesRead; + if (ReadFile(fileHandle, readfile.str, (DWORD)fileSize.QuadPart, &bytesRead, NULL) && (fileSize.QuadPart == bytesRead)) { + result = readfile; + } + } + } + CloseHandle(fileHandle); + } + return result; +#elif OS_LINUX + FILE *input = fopen((char *)filename.str, "r"); + struct stat st; + stat((char *)filename.str, &st); + size_t fsize = st.st_size; + string readBuffer = PushString(arena, fsize); + fread(readBuffer.str, sizeof(byte), readBuffer.length, input); + fclose(input); + return readBuffer; +#endif +} + +bool writeEntireFile(Arena *arena, string filename, const byte *contents, size_t contentsLength) { + bool result = false; +#if OS_WINDOWS + HANDLE fileHandle = CreateFileA(cstring(arena, filename), GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, NULL, NULL); + if (fileHandle != INVALID_HANDLE_VALUE) { + DWORD bytesWritten; + if (WriteFile(fileHandle, contents, (DWORD)contentsLength, &bytesWritten, NULL)) { + // file written successfully + result = bytesWritten == contentsLength; + } + CloseHandle(fileHandle); + } +#elif OS_LINUX + Assert(false); +#endif + return result; +} + +bool fileAppend(Arena *arena, string filename, const byte *contents, size_t contentsLength) { + bool result = false; +#if OS_WINDOWS + HANDLE fileHandle = CreateFileA(cstring(arena, filename), FILE_APPEND_DATA | FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (fileHandle != INVALID_HANDLE_VALUE) { + DWORD bytesWritten; + DWORD position = SetFilePointer(fileHandle, 0, NULL, FILE_END); + if (WriteFile(fileHandle, contents, (DWORD)contentsLength, &bytesWritten, NULL)) { + // file written successfully + result = bytesWritten == contentsLength; + } + CloseHandle(fileHandle); + } +#elif OS_LINUX + Assert(false); +#endif + return result; +} + +list getArgs(Arena *arena, int argc, char **argv) { + list args = PushList(arena, string, (size_t)argc); + for (int i = 1; i < argc; i++) { + appendList(&args, strFromCString(arena, argv[i])); + } + return args; +} + +UnixTimestamp getSystemUnixTime() { + time_t now; + time(&now); + return (UnixTimestamp)now; +} + +Timestamp timestampFromUnixTime(UnixTimestamp *unixTimestamp) { + tm *timestamp = gmtime((time_t *)&time); + return *timestamp; +} + +string formatTimeHms(Arena *arena, UnixTimestamp time) { + local_persist const string format = "HH-MM-SS"_s; + string buf = PushString(arena, format.length); + tm *timestamp = gmtime((time_t *)&time); + strftime(buf.str, buf.length + 1, "%T", timestamp); + return buf; +} + +string formatTimeHms(Arena *arena, Timestamp *time) { + local_persist const string format = "HH-MM-SS"_s; + string buf = PushString(arena, format.length); + strftime(buf.str, buf.length + 1, "%T", (tm *)time); + return buf; +} + +string formatTimeYmd(Arena *arena, UnixTimestamp time) { + local_persist const string format = "YYYY-mm-dd"_s; + string buf = PushString(arena, format.length); + tm *timestamp = gmtime((time_t *)&time); + strftime(buf.str, buf.length + 1, "%Y-%m-%d", timestamp); + return buf; +} + +string formatTimeYmd(Arena *arena, Timestamp *time) { + local_persist const string format = "YYYY-mm-dd"_s; + string buf = PushString(arena, format.length); + strftime(buf.str, buf.length + 1, "%Y-%m-%d", (tm *)time); + return buf; +} + +function void __core_log(LogTarget target, const char *fmt, va_list argList) { + Scratch scratch = scratchStart(0, 0); + string result = strPrintfv(scratch.arena, fmt, argList); +#if OS_WINDOWS + DWORD done; + HANDLE stdHandle; + switch (target) { + case LogTarget_stdin: + stdHandle = GetStdHandle(STD_INPUT_HANDLE); + break; + case LogTarget_stdout: + stdHandle = GetStdHandle(STD_ERROR_HANDLE); + break; + case LogTarget_stderr: + stdHandle = GetStdHandle(STD_OUTPUT_HANDLE); + break; + default: + stdHandle = GetStdHandle(STD_OUTPUT_HANDLE); + break; + } + WriteFile(stdHandle, result.str, (DWORD)result.length, &done, 0); +#elif OS_LINUX + // TODO(djledda): finish implementation without cstdlib + switch (target) { + case LogTarget_stdin: + write(0, (const void *)result.str, result.length); + break; + case LogTarget_stderr: + fflush(stderr); + write(2, (const void *)result.str, result.length); + break; + case LogTarget_stdout: + default: + fflush(stdout); + write(1, (const void *)result.str, result.length); + break; + } +#endif + scratchEnd(scratch); +} + +void logErr(const char *fmt, ...) { + va_list argList; + va_start(argList, fmt); + __core_log(LogTarget_stdout, fmt, argList); + va_end(argList); +} + +function void logStdout(const char *fmt, ...) { + va_list argList; + va_start(argList, fmt); + __core_log(LogTarget_stdout, fmt, argList); + va_end(argList); +} + +void log(const char *fmt, ...) { + va_list argList; + va_start(argList, fmt); + __core_log(LogTarget_stdout, fmt, argList); + va_end(argList); +} + +void log(list l, LogTarget target) { + void (*logFn)(const char *fmt, ...) = target == LogTarget_stdout ? &logStdout : &logErr; + logFn("{ "); + for (size_t i = 0; i < l.length; i++) { + if (i != 0) { + logFn(", "); + } + logFn("%i", l.data[i]); + } + logFn(" } length: %zu, head: %zu\n", l.length, l.head); +} + +void log(list l, LogTarget target) { + void (*logFn)(const char *fmt, ...) = target == LogTarget_stdout ? &logStdout : &logErr; + logFn("{ "); + for (size_t i = 0; i < l.length; i++) { + if (i != 0) { + logFn(", "); + } + logFn("\"%S\"", l.data[i]); + } + logFn(" } length: %zu, head: %zu\n", l.length, l.head); +} + +int intCompare(const void *a, const void *b) { + int *x = (int *)a; + int *y = (int *)b; + return (*x > *y) - (*x < *y); +} + diff --git a/src/lib/djstdlib/core.h b/src/lib/djstdlib/core.h new file mode 100644 index 0000000..60e4dde --- /dev/null +++ b/src/lib/djstdlib/core.h @@ -0,0 +1,219 @@ +#ifndef CORE_H +#define CORE_H + +// cstdlib includes +#include +#include // necessary for int type sizes +#include +#include // TODO(djledda): try not to depend on this one + +// ### Misc macros ### +#if ENABLE_ASSERT +#define Assert(expression) if (!(expression)) {*(volatile int *)0 = 0;} +#else +#define Assert(expression) +#endif + +#define function static +#define global static +#define local_persist static + +// ### Types ### +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; +typedef uint8_t byte; +typedef float real32; +typedef double real64; + +// ### Sizes and Numbers ### +#define Bytes(n) (n) +#define Kilobytes(n) (n << 10) +#define Megabytes(n) (n << 20) +#define Gigabytes(n) (((uint64)n) << 30) +#define Terabytes(n) (((uint64)n) << 40) + +#define Thousand(n) ((n)*1000) +#define Million(n) ((n)*1000000) +#define Billion(n) ((n)*1000000000LL) + +#define ArrayCount(arr) (sizeof(arr) / sizeof((arr)[0])) + +// ### Arenas ### +struct Arena { + void *memory; + size_t capacity; + size_t head; +}; + +struct Scratch { + Arena *arena; + size_t start; +}; + +void *pushSize(Arena *arena, size_t bytes); +Arena *arenaAlloc(size_t capacity); +void arenaFree(Arena *arena); +void arenaFreeFrom(Arena *arena, size_t pos); + +void initialiseCore(); + +Scratch scratchStart(Arena **conflicts, size_t conflictCount); +void scratchEnd(Scratch scratch); + +#define PushArray(arena, type, size) (type *)pushSize(arena, sizeof(type) * (size)) +#define PushStruct(arena, type) (type *)pushSize(arena, sizeof(type)) + +// ### Vectors ### +template +union Vector2 { + struct { + T x; + T y; + }; + T vec[2]; +}; +template +inline function Vector2 vec2(T x, T y) { + Vector2 result = {0}; + result.x = x; + result.y = y; + return result; +} + +template +union Vector3 { + struct { + T x; + T y; + T z; + }; + T vec[3]; +}; +template +inline function Vector3 vec3(T x, T y, T z) { + Vector3 result = {0}; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +template +union Vector4 { + struct { + T x; + T y; + T z; + T w; + }; + T vec[4]; +}; +template +inline function Vector4 vec4(T x, T y, T z, T w) { + Vector4 result = {0}; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +// ### Lists ### +template +struct list { + T* data; + size_t length; + size_t head; +}; + +#define PushList(arena, type, size) (list{ PushArray(arena, type, size), size, 0 }) +#define PushFullList(arena, type, size) (list{ PushArray(arena, type, size), size, size }) + +template T *appendList(list *list, T element); +template void zeroList(list *list); +template void zeroListFull(list *list); +template list listSlice(list l, size_t start, size_t stop = 0); + +// ### Strings ### +struct string { + char *str; + size_t length; +}; +#define STB_SPRINTF_DECORATE(name) stb_##name // define this before including if you want to change the names +#include "vendor/stb_sprintf.h" + +#define strlit(lit) (string{(char *)(lit), sizeof(lit) - 1}) +#define PushString(arena, length) (string{ (char *)pushSize(arena, length), (length) }) +string operator""_s(const char *cstrLiteral, unsigned long length); + +// C Strings +const char *cstring(Arena *arena, list buf); +const char *cstring(Arena *arena, string str); +size_t calcStringLen(const char *str); +string strFromCString(Arena *arena, const char *str); + +bool strEql(string s1, string s2); +bool stringContains(string str, char c); + +string strReverse(Arena *arena, string str); +string strSlice(string str, size_t start, size_t stop = 0); +string strSlice(char *data, size_t start, size_t stop = 0); +list strSplit(Arena *arena, string splitStr, string inputStr); +string strPrintfv(Arena *arena, const char *fmt, va_list args); +string strPrintf(Arena *arena, const char *fmt, ...); + +int8 parsePositiveInt(string str, size_t *lengthPointer); +real32 parsePositiveReal32(Arena *arena, string str, size_t *lengthPointer); + +inline function bool isNumeric(char c); + +// ### File IO ### +string readEntireFile(Arena *arena, string filename); +bool writeEntireFile(Arena *arena, string filename, const byte *contents, size_t contentsLength); +bool fileAppend(Arena *arena, string filename, const byte *contents, size_t contentsLength); + +// ### Cmdline ### +list getArgs(Arena *arena, int argc, char **argv); + +// ### Time ### +typedef uint64 UnixTimestamp; +typedef tm Timestamp; + +UnixTimestamp getSystemUnixTime(); +Timestamp timestampFromUnixTime(UnixTimestamp *unixTimestamp); +string formatTimeHms(Arena *arena, UnixTimestamp time); +string formatTimeHms(Arena *arena, Timestamp *time); +string formatTimeYmd(Arena *arena, UnixTimestamp time); +string formatTimeYmd(Arena *arena, Timestamp *time); + +// ### Linked Lists ### +// TODO(djledda): implement basic linked lists (based on arenas?) + +// ### Logging ### +enum LogTarget { + LogTarget_stdout, + LogTarget_stdin, + LogTarget_stderr, + LogTarget_count, +}; + +void log(list l, LogTarget target = LogTarget_stdout); +void log(list l, LogTarget target = LogTarget_stdout); +void log(const char *fmt, ...); +void logError(const char *fmt, ...); + +// ### Loops ### +#define EachIn(list, it) size_t it = 0; it < list.length; it++ +#define EachInReversed(list, it) size_t it = list.length - 1; it >= 0 && it < list.length; it-- +#define EachInArray(arr, it) size_t it = 0; it < ArrayCount(arr); ++it + +// ### Misc ### +int intCompare(const void *a, const void *b); + +#endif diff --git a/src/lib/djstdlib/os.cpp b/src/lib/djstdlib/os.cpp new file mode 100644 index 0000000..64a2575 --- /dev/null +++ b/src/lib/djstdlib/os.cpp @@ -0,0 +1,12 @@ +#ifndef OS_CPP +#define OS_CPP + +#if OS_WINDOWS +#include "os_win32.cpp" +#elif OS_LINUX +#include "os_linux.cpp" +#else + #error Development environment not supported. +#endif + +#endif diff --git a/src/lib/djstdlib/os.h b/src/lib/djstdlib/os.h new file mode 100644 index 0000000..44aac28 --- /dev/null +++ b/src/lib/djstdlib/os.h @@ -0,0 +1,12 @@ +#ifndef OS_H +#define OS_H + +#include "core.h" + +// ### Memory ### +void *os_alloc(size_t capacity); +void os_reserve(void *ptr); +void os_decommit(void *ptr); +void os_free(void *ptr, size_t freeSize); + +#endif diff --git a/src/lib/djstdlib/os_linux.cpp b/src/lib/djstdlib/os_linux.cpp new file mode 100644 index 0000000..122a4bb --- /dev/null +++ b/src/lib/djstdlib/os_linux.cpp @@ -0,0 +1,24 @@ +#ifndef OS_IMPL_LINUX_CPP +#define OS_IMPL_LINUX_CPP + +#include "os.h" + +#include +#include + +void *os_alloc(size_t capacity) { + return mmap(0, capacity, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + +void os_commit(void *ptr) { +} + +void os_decommit(void *ptr) { +} + +void os_free(void *ptr, size_t size) { + int err = munmap(ptr, size); + Assert(err != -1); +} + +#endif diff --git a/src/lib/djstdlib/os_win32.cpp b/src/lib/djstdlib/os_win32.cpp new file mode 100644 index 0000000..01eb4ec --- /dev/null +++ b/src/lib/djstdlib/os_win32.cpp @@ -0,0 +1,21 @@ +#ifndef OS_IMPL_WIN32_CPP +#define OS_IMPL_WIN32_CPP + +#include "os.h" +#include "Windows.h" + +void *os_alloc(size_t commitSize) { + return VirtualAlloc(NULL, commitSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); +} + +void os_reserve(void *ptr) { +} + +void os_decommit(void *ptr) { +} + +void os_free(void *ptr, size_t size) { + VirtualFree(ptr, NULL, MEM_RELEASE); +} + +#endif diff --git a/src/lib/djstdlib/vendor/stb_sprintf.h b/src/lib/djstdlib/vendor/stb_sprintf.h new file mode 100644 index 0000000..6c2fd25 --- /dev/null +++ b/src/lib/djstdlib/vendor/stb_sprintf.h @@ -0,0 +1,1923 @@ +// NOTE(djledda): This library has been modified to support my string struct, inspired by the Digital Grove codebase by Ryan Fleury. + +// stb_sprintf - v1.10 - public domain snprintf() implementation +// originally by Jeff Roberts / RAD Game Tools, 2015/10/20 +// http://github.com/nothings/stb +// +// allowed types: sc uidBboXx p AaGgEef n +// lengths : hh h ll j z t I64 I32 I +// +// Contributors: +// Fabian "ryg" Giesen (reformatting) +// github:aganm (attribute format) +// +// Contributors (bugfixes): +// github:d26435 +// github:trex78 +// github:account-login +// Jari Komppa (SI suffixes) +// Rohit Nirmal +// Marcin Wojdyr +// Leonard Ritter +// Stefano Zanotti +// Adam Allison +// Arvid Gerstmann +// Markus Kolb +// +// LICENSE: +// +// See end of file for license information. + +#ifndef STB_SPRINTF_H_INCLUDE +#define STB_SPRINTF_H_INCLUDE + +/* +Single file sprintf replacement. + +Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20. +Hereby placed in public domain. + +This is a full sprintf replacement that supports everything that +the C runtime sprintfs support, including float/double, 64-bit integers, +hex floats, field parameters (%*.*d stuff), length reads backs, etc. + +Why would you need this if sprintf already exists? Well, first off, +it's *much* faster (see below). It's also much smaller than the CRT +versions code-space-wise. We've also added some simple improvements +that are super handy (commas in thousands, callbacks at buffer full, +for example). Finally, the format strings for MSVC and GCC differ +for 64-bit integers (among other small things), so this lets you use +the same format strings in cross platform code. + +It uses the standard single file trick of being both the header file +and the source itself. If you just include it normally, you just get +the header file function definitions. To get the code, you include +it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first. + +It only uses va_args macros from the C runtime to do it's work. It +does cast doubles to S64s and shifts and divides U64s, which does +drag in CRT code on most platforms. + +It compiles to roughly 8K with float support, and 4K without. +As a comparison, when using MSVC static libs, calling sprintf drags +in 16K. + +API: +==== +int stbsp_sprintf( char * buf, char const * fmt, ... ) +int stbsp_snprintf( char * buf, int count, char const * fmt, ... ) + Convert an arg list into a buffer. stbsp_snprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintf( char * buf, char const * fmt, va_list va ) +int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va ) + Convert a va_list arg list into a buffer. stbsp_vsnprintf always returns + a zero-terminated string (unlike regular snprintf). + +int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va ) + typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len ); + Convert into a buffer, calling back every STB_SPRINTF_MIN chars. + Your callback can then copy the chars out, print them or whatever. + This function is actually the workhorse for everything else. + The buffer you pass in must hold at least STB_SPRINTF_MIN characters. + // you return the next buffer to use or 0 to stop converting + +void stbsp_set_separators( char comma, char period ) + Set the comma and period characters to use. + +FLOATS/DOUBLES: +=============== +This code uses a internal float->ascii conversion method that uses +doubles with error correction (double-doubles, for ~105 bits of +precision). This conversion is round-trip perfect - that is, an atof +of the values output here will give you the bit-exact double back. + +One difference is that our insignificant digits will be different than +with MSVC or GCC (but they don't match each other either). We also +don't attempt to find the minimum length matching float (pre-MSVC15 +doesn't either). + +If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT +and you'll save 4K of code space. + +64-BIT INTS: +============ +This library also supports 64-bit integers and you can use MSVC style or +GCC style indicators (%I64d or %lld). It supports the C99 specifiers +for size_t and ptr_diff_t (%jd %zd) as well. + +EXTRAS: +======= +Like some GCCs, for integers and floats, you can use a ' (single quote) +specifier and commas will be inserted on the thousands: "%'d" on 12345 +would print 12,345. + +For integers and floats, you can use a "$" specifier and the number +will be converted to float and then divided to get kilo, mega, giga or +tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is +"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn +2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three +$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the +suffix, add "_" specifier: "%_$d" -> "2.53M". + +In addition to octal and hexadecimal conversions, you can print +integers in binary: "%b" for 256 would print 100. + +PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC): +=================================================================== +"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC) +"%24d" across all 32-bit ints (4.5x/4.2x faster) +"%x" across all 32-bit ints (4.5x/3.8x faster) +"%08x" across all 32-bit ints (4.3x/3.8x faster) +"%f" across e-10 to e+10 floats (7.3x/6.0x faster) +"%e" across e-10 to e+10 floats (8.1x/6.0x faster) +"%g" across e-10 to e+10 floats (10.0x/7.1x faster) +"%f" for values near e-300 (7.9x/6.5x faster) +"%f" for values near e+300 (10.0x/9.1x faster) +"%e" for values near e-300 (10.1x/7.0x faster) +"%e" for values near e+300 (9.2x/6.0x faster) +"%.320f" for values near e-300 (12.6x/11.2x faster) +"%a" for random values (8.6x/4.3x faster) +"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster) +"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster) +"%s%s%s" for 64 char strings (7.1x/7.3x faster) +"...512 char string..." ( 35.0x/32.5x faster!) +*/ + +#if defined(__clang__) + #if defined(__has_feature) && defined(__has_attribute) + #if __has_feature(address_sanitizer) + #if __has_attribute(__no_sanitize__) + #define STBSP__ASAN __attribute__((__no_sanitize__("address"))) + #elif __has_attribute(__no_sanitize_address__) + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #elif __has_attribute(__no_address_safety_analysis__) + #define STBSP__ASAN __attribute__((__no_address_safety_analysis__)) + #endif + #endif + #endif +#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ + #define STBSP__ASAN __attribute__((__no_sanitize_address__)) + #endif +#endif + +#ifndef STBSP__ASAN +#define STBSP__ASAN +#endif + +#ifdef STB_SPRINTF_STATIC +#define STBSP__PUBLICDEC static +#define STBSP__PUBLICDEF static STBSP__ASAN +#else +#ifdef __cplusplus +#define STBSP__PUBLICDEC extern "C" +#define STBSP__PUBLICDEF extern "C" STBSP__ASAN +#else +#define STBSP__PUBLICDEC extern +#define STBSP__PUBLICDEF STBSP__ASAN +#endif +#endif + +#if defined(__has_attribute) + #if __has_attribute(format) + #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va))) + #endif +#endif + +#ifndef STBSP__ATTRIBUTE_FORMAT +#define STBSP__ATTRIBUTE_FORMAT(fmt,va) +#endif + +#ifdef _MSC_VER +#define STBSP__NOTUSED(v) (void)(v) +#else +#define STBSP__NOTUSED(v) (void)sizeof(v) +#endif + +#include // for va_arg(), va_list() +#include // size_t, ptrdiff_t + +#ifndef STB_SPRINTF_MIN +#define STB_SPRINTF_MIN 512 // how many characters per callback +#endif +typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len); + +#ifndef STB_SPRINTF_DECORATE +#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names +#endif + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3); +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4); + +STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va); +STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period); + +#endif // STB_SPRINTF_H_INCLUDE + +#ifdef STB_SPRINTF_IMPLEMENTATION + +#define stbsp__uint32 unsigned int +#define stbsp__int32 signed int + +#ifdef _MSC_VER +#define stbsp__uint64 unsigned __int64 +#define stbsp__int64 signed __int64 +#else +#define stbsp__uint64 unsigned long long +#define stbsp__int64 signed long long +#endif +#define stbsp__uint16 unsigned short + +#ifndef stbsp__uintptr +#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__) +#define stbsp__uintptr stbsp__uint64 +#else +#define stbsp__uintptr stbsp__uint32 +#endif +#endif + +#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC) +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define STB_SPRINTF_MSVC_MODE +#endif +#endif + +#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses +#define STBSP__UNALIGNED(code) +#else +#define STBSP__UNALIGNED(code) code +#endif + +#ifndef STB_SPRINTF_NOFLOAT +// internal float utility functions +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits); +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value); +#define STBSP__SPECIAL 0x7000 +#endif + +static char stbsp__period = '.'; +static char stbsp__comma = ','; +static struct +{ + short temp; // force next field to be 2-byte aligned + char pair[201]; +} stbsp__digitpair = +{ + 0, + "00010203040506070809101112131415161718192021222324" + "25262728293031323334353637383940414243444546474849" + "50515253545556575859606162636465666768697071727374" + "75767778798081828384858687888990919293949596979899" +}; + +STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod) +{ + stbsp__period = pperiod; + stbsp__comma = pcomma; +} + +#define STBSP__LEFTJUST 1 +#define STBSP__LEADINGPLUS 2 +#define STBSP__LEADINGSPACE 4 +#define STBSP__LEADING_0X 8 +#define STBSP__LEADINGZERO 16 +#define STBSP__INTMAX 32 +#define STBSP__TRIPLET_COMMA 64 +#define STBSP__NEGATIVE 128 +#define STBSP__METRIC_SUFFIX 256 +#define STBSP__HALFWIDTH 512 +#define STBSP__METRIC_NOSPACE 1024 +#define STBSP__METRIC_1024 2048 +#define STBSP__METRIC_JEDEC 4096 + +static void stbsp__lead_sign(stbsp__uint32 fl, char *sign) +{ + sign[0] = 0; + if (fl & STBSP__NEGATIVE) { + sign[0] = 1; + sign[1] = '-'; + } else if (fl & STBSP__LEADINGSPACE) { + sign[0] = 1; + sign[1] = ' '; + } else if (fl & STBSP__LEADINGPLUS) { + sign[0] = 1; + sign[1] = '+'; + } +} + +static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit) +{ + char const * sn = s; + + // get up to 4-byte alignment + for (;;) { + if (((stbsp__uintptr)sn & 3) == 0) + break; + + if (!limit || *sn == 0) + return (stbsp__uint32)(sn - s); + + ++sn; + --limit; + } + + // scan over 4 bytes at a time to find terminating 0 + // this will intentionally scan up to 3 bytes past the end of buffers, + // but becase it works 4B aligned, it will never cross page boundaries + // (hence the STBSP__ASAN markup; the over-read here is intentional + // and harmless) + while (limit >= 4) { + stbsp__uint32 v = *(stbsp__uint32 *)sn; + // bit hack to find if there's a 0 byte in there + if ((v - 0x01010101) & (~v) & 0x80808080UL) + break; + + sn += 4; + limit -= 4; + } + + // handle the last few characters to find actual size + while (limit && *sn) { + ++sn; + --limit; + } + + return (stbsp__uint32)(sn - s); +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va) +{ + static char hex[] = "0123456789abcdefxp"; + static char hexu[] = "0123456789ABCDEFXP"; + char *bf; + char const *f; + int tlen = 0; + + bf = buf; + f = fmt; + for (;;) { + stbsp__int32 fw, pr, tz; + stbsp__uint32 fl; + + // macros for the callback buffer stuff + #define stbsp__chk_cb_bufL(bytes) \ + { \ + int len = (int)(bf - buf); \ + if ((len + (bytes)) >= STB_SPRINTF_MIN) { \ + tlen += len; \ + if (0 == (bf = buf = callback(buf, user, len))) \ + goto done; \ + } \ + } + #define stbsp__chk_cb_buf(bytes) \ + { \ + if (callback) { \ + stbsp__chk_cb_bufL(bytes); \ + } \ + } + #define stbsp__flush_cb() \ + { \ + stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \ + } // flush if there is even one byte in the buffer + #define stbsp__cb_buf_clamp(cl, v) \ + cl = v; \ + if (callback) { \ + int lg = STB_SPRINTF_MIN - (int)(bf - buf); \ + if (cl > lg) \ + cl = lg; \ + } + + // fast copy everything up to the next % (or end of string) + for (;;) { + while (((stbsp__uintptr)f) & 3) { + schk1: + if (f[0] == '%') + goto scandd; + schk2: + if (f[0] == 0) + goto endfmt; + stbsp__chk_cb_buf(1); + *bf++ = f[0]; + ++f; + } + for (;;) { + // Check if the next 4 bytes contain %(0x25) or end of string. + // Using the 'hasless' trick: + // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + stbsp__uint32 v, c; + v = *(stbsp__uint32 *)f; + c = (~v) & 0x80808080; + if (((v ^ 0x25252525) - 0x01010101) & c) + goto schk1; + if ((v - 0x01010101) & c) + goto schk2; + if (callback) + if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4) + goto schk1; + #ifdef STB_SPRINTF_NOUNALIGNED + if(((stbsp__uintptr)bf) & 3) { + bf[0] = f[0]; + bf[1] = f[1]; + bf[2] = f[2]; + bf[3] = f[3]; + } else + #endif + { + *(stbsp__uint32 *)bf = v; + } + bf += 4; + f += 4; + } + } + scandd: + + ++f; + + // ok, we have a percent, read the modifiers first + fw = 0; + pr = -1; + fl = 0; + tz = 0; + + // flags + for (;;) { + switch (f[0]) { + // if we have left justify + case '-': + fl |= STBSP__LEFTJUST; + ++f; + continue; + // if we have leading plus + case '+': + fl |= STBSP__LEADINGPLUS; + ++f; + continue; + // if we have leading space + case ' ': + fl |= STBSP__LEADINGSPACE; + ++f; + continue; + // if we have leading 0x + case '#': + fl |= STBSP__LEADING_0X; + ++f; + continue; + // if we have thousand commas + case '\'': + fl |= STBSP__TRIPLET_COMMA; + ++f; + continue; + // if we have kilo marker (none->kilo->kibi->jedec) + case '$': + if (fl & STBSP__METRIC_SUFFIX) { + if (fl & STBSP__METRIC_1024) { + fl |= STBSP__METRIC_JEDEC; + } else { + fl |= STBSP__METRIC_1024; + } + } else { + fl |= STBSP__METRIC_SUFFIX; + } + ++f; + continue; + // if we don't want space between metric suffix and number + case '_': + fl |= STBSP__METRIC_NOSPACE; + ++f; + continue; + // if we have leading zero + case '0': + fl |= STBSP__LEADINGZERO; + ++f; + goto flags_done; + default: goto flags_done; + } + } + flags_done: + + // get the field width + if (f[0] == '*') { + fw = va_arg(va, stbsp__uint32); + ++f; + } else { + while ((f[0] >= '0') && (f[0] <= '9')) { + fw = fw * 10 + f[0] - '0'; + f++; + } + } + // get the precision + if (f[0] == '.') { + ++f; + if (f[0] == '*') { + pr = va_arg(va, stbsp__uint32); + ++f; + } else { + pr = 0; + while ((f[0] >= '0') && (f[0] <= '9')) { + pr = pr * 10 + f[0] - '0'; + f++; + } + } + } + + // handle integer size overrides + switch (f[0]) { + // are we halfwidth? + case 'h': + fl |= STBSP__HALFWIDTH; + ++f; + if (f[0] == 'h') + ++f; // QUARTERWIDTH + break; + // are we 64-bit (unix style) + case 'l': + fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0); + ++f; + if (f[0] == 'l') { + fl |= STBSP__INTMAX; + ++f; + } + break; + // are we 64-bit on intmax? (c99) + case 'j': + fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit on size_t or ptrdiff_t? (c99) + case 'z': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + case 't': + fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0; + ++f; + break; + // are we 64-bit (msft style) + case 'I': + if ((f[1] == '6') && (f[2] == '4')) { + fl |= STBSP__INTMAX; + f += 3; + } else if ((f[1] == '3') && (f[2] == '2')) { + f += 3; + } else { + fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0); + ++f; + } + break; + default: break; + } + + // handle each replacement + switch (f[0]) { + #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307 + char num[STBSP__NUMSZ]; + char lead[8]; + char tail[8]; + char *s; + char const *h; + stbsp__uint32 l, n, cs; + stbsp__uint64 n64; +#ifndef STB_SPRINTF_NOFLOAT + double fv; +#endif + stbsp__int32 dp; + char const *sn; + + case 's': + // get the string + s = va_arg(va, char *); + if (s == 0) + s = (char *)"null"; + // get the length, limited to desired precision + // always limit to ~0u chars since our counts are 32b + l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u); + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + // copy the string in + goto scopy; + + case 'S': + { + // string struct + string str = va_arg(va, string); + s = (char *)str.str; + sn = (const char *)(str.str + str.length); + l = (unsigned int)str.length; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + } break; + + case 'c': // char + // get the character + s = num + STBSP__NUMSZ - 1; + *s = (char)va_arg(va, int); + l = 1; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + + case 'n': // weird write-bytes specifier + { + int *d = va_arg(va, int *); + *d = tlen + (int)(bf - buf); + } break; + +#ifdef STB_SPRINTF_NOFLOAT + case 'A': // float + case 'a': // hex float + case 'G': // float + case 'g': // float + case 'E': // float + case 'e': // float + case 'f': // float + va_arg(va, double); // eat it + s = (char *)"No float"; + l = 8; + lead[0] = 0; + tail[0] = 0; + pr = 0; + cs = 0; + STBSP__NOTUSED(dp); + goto scopy; +#else + case 'A': // hex float + case 'a': // hex float + h = (f[0] == 'A') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) + fl |= STBSP__NEGATIVE; + + s = num + 64; + + stbsp__lead_sign(fl, lead); + + if (dp == -1023) + dp = (n64) ? -1022 : 0; + else + n64 |= (((stbsp__uint64)1) << 52); + n64 <<= (64 - 56); + if (pr < 15) + n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4)); +// add leading chars + +#ifdef STB_SPRINTF_MSVC_MODE + *s++ = '0'; + *s++ = 'x'; +#else + lead[1 + lead[0]] = '0'; + lead[2 + lead[0]] = 'x'; + lead[0] += 2; +#endif + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + if (pr) + *s++ = stbsp__period; + sn = s; + + // print the bits + n = pr; + if (n > 13) + n = 13; + if (pr > (stbsp__int32)n) + tz = pr - n; + pr = 0; + while (n--) { + *s++ = h[(n64 >> 60) & 15]; + n64 <<= 4; + } + + // print the expo + tail[1] = h[17]; + if (dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; + n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3)); + tail[0] = (char)n; + for (;;) { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + + dp = (int)(s - sn); + l = (int)(s - (num + 64)); + s = num + 64; + cs = 1 + (3 << 24); + goto scopy; + + case 'G': // float + case 'g': // float + h = (f[0] == 'G') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; + else if (pr == 0) + pr = 1; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000)) + fl |= STBSP__NEGATIVE; + + // clamp the precision and delete extra zeros after clamp + n = pr; + if (l > (stbsp__uint32)pr) + l = pr; + while ((l > 1) && (pr) && (sn[l - 1] == '0')) { + --pr; + --l; + } + + // should we use %e + if ((dp <= -4) || (dp > (stbsp__int32)n)) { + if (pr > (stbsp__int32)l) + pr = l - 1; + else if (pr) + --pr; // when using %e, there is one digit before the decimal + goto doexpfromg; + } + // this is the insane action to get the pr to match %g semantics for %f + if (dp > 0) { + pr = (dp < (stbsp__int32)l) ? l - dp : 0; + } else { + pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr); + } + goto dofloatfromg; + + case 'E': // float + case 'e': // float + h = (f[0] == 'E') ? hexu : hex; + fv = va_arg(va, double); + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000)) + fl |= STBSP__NEGATIVE; + doexpfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if (dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + // handle leading chars + *s++ = sn[0]; + + if (pr) + *s++ = stbsp__period; + + // handle after decimal + if ((l - 1) > (stbsp__uint32)pr) + l = pr + 1; + for (n = 1; n < l; n++) + *s++ = sn[n]; + // trailing zeros + tz = pr - (l - 1); + pr = 0; + // dump expo + tail[1] = h[0xe]; + dp -= 1; + if (dp < 0) { + tail[2] = '-'; + dp = -dp; + } else + tail[2] = '+'; +#ifdef STB_SPRINTF_MSVC_MODE + n = 5; +#else + n = (dp >= 100) ? 5 : 4; +#endif + tail[0] = (char)n; + for (;;) { + tail[n] = '0' + dp % 10; + if (n <= 3) + break; + --n; + dp /= 10; + } + cs = 1 + (3 << 24); // how many tens + goto flt_lead; + + case 'f': // float + fv = va_arg(va, double); + doafloat: + // do kilos + if (fl & STBSP__METRIC_SUFFIX) { + double divisor; + divisor = 1000.0f; + if (fl & STBSP__METRIC_1024) + divisor = 1024.0; + while (fl < 0x4000000) { + if ((fv < divisor) && (fv > -divisor)) + break; + fv /= divisor; + fl += 0x1000000; + } + } + if (pr == -1) + pr = 6; // default is 6 + // read the double into a string + if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr)) + fl |= STBSP__NEGATIVE; + dofloatfromg: + tail[0] = 0; + stbsp__lead_sign(fl, lead); + if (dp == STBSP__SPECIAL) { + s = (char *)sn; + cs = 0; + pr = 0; + goto scopy; + } + s = num + 64; + + // handle the three decimal varieties + if (dp <= 0) { + stbsp__int32 i; + // handle 0.000*000xxxx + *s++ = '0'; + if (pr) + *s++ = stbsp__period; + n = -dp; + if ((stbsp__int32)n > pr) + n = pr; + i = n; + while (i) { + if ((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + i -= 4; + } + while (i) { + *s++ = '0'; + --i; + } + if ((stbsp__int32)(l + n) > pr) + l = pr - n; + i = l; + while (i) { + *s++ = *sn++; + --i; + } + tz = pr - (n + l); + cs = 1 + (3 << 24); // how many tens did we write (for commas below) + } else { + cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0; + if ((stbsp__uint32)dp >= l) { + // handle xxxx000*000.0 + n = 0; + for (;;) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if (n >= l) + break; + } + } + if (n < (stbsp__uint32)dp) { + n = dp - n; + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + while (n) { + if ((((stbsp__uintptr)s) & 3) == 0) + break; + *s++ = '0'; + --n; + } + while (n >= 4) { + *(stbsp__uint32 *)s = 0x30303030; + s += 4; + n -= 4; + } + } + while (n) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = '0'; + --n; + } + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) { + *s++ = stbsp__period; + tz = pr; + } + } else { + // handle xxxxx.xxxx000*000 + n = 0; + for (;;) { + if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) { + cs = 0; + *s++ = stbsp__comma; + } else { + *s++ = sn[n]; + ++n; + if (n >= (stbsp__uint32)dp) + break; + } + } + cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens + if (pr) + *s++ = stbsp__period; + if ((l - dp) > (stbsp__uint32)pr) + l = pr + dp; + while (n < l) { + *s++ = sn[n]; + ++n; + } + tz = pr - (l - dp); + } + } + pr = 0; + + // handle k,m,g,t + if (fl & STBSP__METRIC_SUFFIX) { + char idx; + idx = 1; + if (fl & STBSP__METRIC_NOSPACE) + idx = 0; + tail[0] = idx; + tail[1] = ' '; + { + if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'. + if (fl & STBSP__METRIC_1024) + tail[idx + 1] = "_KMGT"[fl >> 24]; + else + tail[idx + 1] = "_kMGT"[fl >> 24]; + idx++; + // If printing kibits and not in jedec, add the 'i'. + if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) { + tail[idx + 1] = 'i'; + idx++; + } + tail[0] = idx; + } + } + }; + + flt_lead: + // get the length that we copied + l = (stbsp__uint32)(s - (num + 64)); + s = num + 64; + goto scopy; +#endif + + case 'B': // upper binary + case 'b': // lower binary + h = (f[0] == 'B') ? hexu : hex; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[0xb]; + } + l = (8 << 4) | (1 << 8); + goto radixnum; + + case 'o': // octal + h = hexu; + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 1; + lead[1] = '0'; + } + l = (3 << 4) | (3 << 8); + goto radixnum; + + case 'p': // pointer + fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0; + pr = sizeof(void *) * 2; + fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros + // fall through - to X + + case 'X': // upper hex + case 'x': // lower hex + h = (f[0] == 'X') ? hexu : hex; + l = (4 << 4) | (4 << 8); + lead[0] = 0; + if (fl & STBSP__LEADING_0X) { + lead[0] = 2; + lead[1] = '0'; + lead[2] = h[16]; + } + radixnum: + // get the number + if (fl & STBSP__INTMAX) + n64 = va_arg(va, stbsp__uint64); + else + n64 = va_arg(va, stbsp__uint32); + + s = num + STBSP__NUMSZ; + dp = 0; + // clear tail, and clear leading if value is zero + tail[0] = 0; + if (n64 == 0) { + lead[0] = 0; + if (pr == 0) { + l = 0; + cs = 0; + goto scopy; + } + } + // convert to string + for (;;) { + *--s = h[n64 & ((1 << (l >> 8)) - 1)]; + n64 >>= (l >> 8); + if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr))) + break; + if (fl & STBSP__TRIPLET_COMMA) { + ++l; + if ((l & 15) == ((l >> 4) & 15)) { + l &= ~15; + *--s = stbsp__comma; + } + } + }; + // get the tens and the comma pos + cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24); + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + // copy it + goto scopy; + + case 'u': // unsigned + case 'i': + case 'd': // integer + // get the integer and abs it + if (fl & STBSP__INTMAX) { + stbsp__int64 i64 = va_arg(va, stbsp__int64); + n64 = (stbsp__uint64)i64; + if ((f[0] != 'u') && (i64 < 0)) { + n64 = (stbsp__uint64)-i64; + fl |= STBSP__NEGATIVE; + } + } else { + stbsp__int32 i = va_arg(va, stbsp__int32); + n64 = (stbsp__uint32)i; + if ((f[0] != 'u') && (i < 0)) { + n64 = (stbsp__uint32)-i; + fl |= STBSP__NEGATIVE; + } + } + +#ifndef STB_SPRINTF_NOFLOAT + if (fl & STBSP__METRIC_SUFFIX) { + if (n64 < 1024) + pr = 0; + else if (pr == -1) + pr = 1; + fv = (double)(stbsp__int64)n64; + goto doafloat; + } +#endif + + // convert to string + s = num + STBSP__NUMSZ; + l = 0; + + for (;;) { + // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators) + char *o = s - 8; + if (n64 >= 100000000) { + n = (stbsp__uint32)(n64 % 100000000); + n64 /= 100000000; + } else { + n = (stbsp__uint32)n64; + n64 = 0; + } + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + do { + s -= 2; + *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + } while (n); + } + while (n) { + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = (char)(n % 10) + '0'; + n /= 10; + } + } + if (n64 == 0) { + if ((s[0] == '0') && (s != (num + STBSP__NUMSZ))) + ++s; + break; + } + while (s != o) + if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) { + l = 0; + *--s = stbsp__comma; + --o; + } else { + *--s = '0'; + } + } + + tail[0] = 0; + stbsp__lead_sign(fl, lead); + + // get the length that we copied + l = (stbsp__uint32)((num + STBSP__NUMSZ) - s); + if (l == 0) { + *--s = '0'; + l = 1; + } + cs = l + (3 << 24); + if (pr < 0) + pr = 0; + + scopy: + // get fw=leading/trailing space, pr=leading zeros + if (pr < (stbsp__int32)l) + pr = l; + n = pr + lead[0] + tail[0] + tz; + if (fw < (stbsp__int32)n) + fw = n; + fw -= n; + pr -= l; + + // handle right justify and leading zeros + if ((fl & STBSP__LEFTJUST) == 0) { + if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr + { + pr = (fw > pr) ? fw : pr; + fw = 0; + } else { + fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas + } + } + + // copy the spaces and/or zeros + if (fw + pr) { + stbsp__int32 i; + stbsp__uint32 c; + + // copy leading spaces (or when doing %8.4d stuff) + if ((fl & STBSP__LEFTJUST) == 0) + while (fw > 0) { + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i) { + *bf++ = ' '; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leader + sn = lead + 1; + while (lead[0]) { + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy leading zeros + c = cs >> 24; + cs &= 0xffffff; + cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0; + while (pr > 0) { + stbsp__cb_buf_clamp(i, pr); + pr -= i; + if ((fl & STBSP__TRIPLET_COMMA) == 0) { + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + } + while (i) { + if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) { + cs = 0; + *bf++ = stbsp__comma; + } else + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + } + + // copy leader if there is still one + sn = lead + 1; + while (lead[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, lead[0]); + lead[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy the string + n = l; + while (n) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, n); + n -= i; + STBSP__UNALIGNED(while (i >= 4) { + *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s; + bf += 4; + s += 4; + i -= 4; + }) + while (i) { + *bf++ = *s++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy trailing zeros + while (tz) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tz); + tz -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = '0'; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x30303030; + bf += 4; + i -= 4; + } + while (i) { + *bf++ = '0'; + --i; + } + stbsp__chk_cb_buf(1); + } + + // copy tail if there is one + sn = tail + 1; + while (tail[0]) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, tail[0]); + tail[0] -= (char)i; + while (i) { + *bf++ = *sn++; + --i; + } + stbsp__chk_cb_buf(1); + } + + // handle the left justify + if (fl & STBSP__LEFTJUST) + if (fw > 0) { + while (fw) { + stbsp__int32 i; + stbsp__cb_buf_clamp(i, fw); + fw -= i; + while (i) { + if ((((stbsp__uintptr)bf) & 3) == 0) + break; + *bf++ = ' '; + --i; + } + while (i >= 4) { + *(stbsp__uint32 *)bf = 0x20202020; + bf += 4; + i -= 4; + } + while (i--) + *bf++ = ' '; + stbsp__chk_cb_buf(1); + } + } + break; + + default: // unknown, just copy code + s = num + STBSP__NUMSZ - 1; + *s = f[0]; + l = 1; + fw = fl = 0; + lead[0] = 0; + tail[0] = 0; + pr = 0; + dp = 0; + cs = 0; + goto scopy; + } + ++f; + } +endfmt: + + if (!callback) + *bf = 0; + else + stbsp__flush_cb(); + +done: + return tlen + (int)(bf - buf); +} + +// cleanup +#undef STBSP__LEFTJUST +#undef STBSP__LEADINGPLUS +#undef STBSP__LEADINGSPACE +#undef STBSP__LEADING_0X +#undef STBSP__LEADINGZERO +#undef STBSP__INTMAX +#undef STBSP__TRIPLET_COMMA +#undef STBSP__NEGATIVE +#undef STBSP__METRIC_SUFFIX +#undef STBSP__NUMSZ +#undef stbsp__chk_cb_bufL +#undef stbsp__chk_cb_buf +#undef stbsp__flush_cb +#undef stbsp__cb_buf_clamp + +// ============================================================================ +// wrapper functions + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); + va_end(va); + return result; +} + +typedef struct stbsp__context { + char *buf; + int count; + int length; + char tmp[STB_SPRINTF_MIN]; +} stbsp__context; + +static char *stbsp__clamp_callback(const char *buf, void *user, int len) +{ + stbsp__context *c = (stbsp__context *)user; + c->length += len; + + if (len > c->count) + len = c->count; + + if (len) { + if (buf != c->buf) { + const char *s, *se; + char *d; + d = c->buf; + s = buf; + se = buf + len; + do { + *d++ = *s++; + } while (s < se); + } + c->buf += len; + c->count -= len; + } + + if (c->count <= 0) + return c->tmp; + return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can +} + +static char * stbsp__count_clamp_callback( const char * buf, void * user, int len ) +{ + stbsp__context * c = (stbsp__context*)user; + (void) sizeof(buf); + + c->length += len; + return c->tmp; // go direct into buffer if you can +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va ) +{ + stbsp__context c; + + if ( (count == 0) && !buf ) + { + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va ); + } + else + { + int l; + + c.buf = buf; + c.count = count; + c.length = 0; + + STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va ); + + // zero-terminate + l = (int)( c.buf - buf ); + if ( l >= count ) // should never be greater, only equal (or less) than count + l = count - 1; + buf[l] = 0; + } + + return c.length; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) +{ + int result; + va_list va; + va_start(va, fmt); + + result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va); + va_end(va); + + return result; +} + +STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va) +{ + return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va); +} + +// ======================================================================= +// low level float utility functions + +#ifndef STB_SPRINTF_NOFLOAT + +// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox) +#define STBSP__COPYFP(dest, src) \ + { \ + int cn; \ + for (cn = 0; cn < 8; cn++) \ + ((char *)&dest)[cn] = ((char *)&src)[cn]; \ + } + +// get float info +static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value) +{ + double d; + stbsp__int64 b = 0; + + // load value and round at the frac_digits + d = value; + + STBSP__COPYFP(b, d); + + *bits = b & ((((stbsp__uint64)1) << 52) - 1); + *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023); + + return (stbsp__int32)((stbsp__uint64) b >> 63); +} + +static double const stbsp__bot[23] = { + 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011, + 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022 +}; +static double const stbsp__negbot[22] = { + 1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011, + 1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022 +}; +static double const stbsp__negboterr[22] = { + -5.551115123125783e-018, -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023, + 4.5251888174113739e-024, -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028, 2.0113352370744385e-029, + -3.0373745563400371e-030, 1.1806906454401013e-032, -7.7705399876661076e-032, 2.0902213275965398e-033, -7.1542424054621921e-034, -7.1542424054621926e-035, + 2.4754073164739869e-036, 5.4846728545790429e-037, 9.2462547772103625e-038, -4.8596774326570872e-039 +}; +static double const stbsp__top[13] = { + 1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299 +}; +static double const stbsp__negtop[13] = { + 1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299 +}; +static double const stbsp__toperr[13] = { + 8388608, + 6.8601809640529717e+028, + -7.253143638152921e+052, + -4.3377296974619174e+075, + -1.5559416129466825e+098, + -3.2841562489204913e+121, + -3.7745893248228135e+144, + -1.7356668416969134e+167, + -3.8893577551088374e+190, + -9.9566444326005119e+213, + 6.3641293062232429e+236, + -5.2069140800249813e+259, + -5.2504760255204387e+282 +}; +static double const stbsp__negtoperr[13] = { + 3.9565301985100693e-040, -2.299904345391321e-063, 3.6506201437945798e-086, 1.1875228833981544e-109, + -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178, -5.7778912386589953e-201, + 7.4997100559334532e-224, -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293, + 8.0970921678014997e-317 +}; + +#if defined(_MSC_VER) && (_MSC_VER <= 1200) +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000, + 10000000000000000000U +}; +#define stbsp__tento19th ((stbsp__uint64)1000000000000000000) +#else +static stbsp__uint64 const stbsp__powten[20] = { + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL +}; +#define stbsp__tento19th (1000000000000000000ULL) +#endif + +#define stbsp__ddmulthi(oh, ol, xh, yh) \ + { \ + double ahi = 0, alo, bhi = 0, blo; \ + stbsp__int64 bt; \ + oh = xh * yh; \ + STBSP__COPYFP(bt, xh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(ahi, bt); \ + alo = xh - ahi; \ + STBSP__COPYFP(bt, yh); \ + bt &= ((~(stbsp__uint64)0) << 27); \ + STBSP__COPYFP(bhi, bt); \ + blo = yh - bhi; \ + ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \ + } + +#define stbsp__ddtoS64(ob, xh, xl) \ + { \ + double ahi = 0, alo, vh, t; \ + ob = (stbsp__int64)xh; \ + vh = (double)ob; \ + ahi = (xh - vh); \ + t = (ahi - xh); \ + alo = (xh - (ahi - t)) - (vh + t); \ + ob += (stbsp__int64)(ahi + alo + xl); \ + } + +#define stbsp__ddrenorm(oh, ol) \ + { \ + double s; \ + s = oh + ol; \ + ol = ol - (s - oh); \ + oh = s; \ + } + +#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh); + +#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl); + +static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350 +{ + double ph, pl; + if ((power >= 0) && (power <= 22)) { + stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]); + } else { + stbsp__int32 e, et, eb; + double p2h, p2l; + + e = power; + if (power < 0) + e = -e; + et = (e * 0x2c9) >> 14; /* %23 */ + if (et > 13) + et = 13; + eb = e - (et * 23); + + ph = d; + pl = 0.0; + if (power < 0) { + if (eb) { + --eb; + stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]); + stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]); + } + if (et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]); + ph = p2h; + pl = p2l; + } + } else { + if (eb) { + e = eb; + if (eb > 22) + eb = 22; + e -= eb; + stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]); + if (e) { + stbsp__ddrenorm(ph, pl); + stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]); + stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl); + ph = p2h; + pl = p2l; + } + } + if (et) { + stbsp__ddrenorm(ph, pl); + --et; + stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]); + stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]); + ph = p2h; + pl = p2l; + } + } + } + stbsp__ddrenorm(ph, pl); + *ohi = ph; + *olo = pl; +} + +// given a float value, returns the significant bits in bits, and the position of the +// decimal point in decimal_pos. +/-INF and NAN are specified by special values +// returned in the decimal_pos parameter. +// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000 +static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits) +{ + double d; + stbsp__int64 bits = 0; + stbsp__int32 expo, e, ng, tens; + + d = value; + STBSP__COPYFP(bits, d); + expo = (stbsp__int32)((bits >> 52) & 2047); + ng = (stbsp__int32)((stbsp__uint64) bits >> 63); + if (ng) + d = -d; + + if (expo == 2047) // is nan or inf? + { + *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf"; + *decimal_pos = STBSP__SPECIAL; + *len = 3; + return ng; + } + + if (expo == 0) // is zero or denormal + { + if (((stbsp__uint64) bits << 1) == 0) // do zero + { + *decimal_pos = 1; + *start = out; + out[0] = '0'; + *len = 1; + return ng; + } + // find the right expo for denormals + { + stbsp__int64 v = ((stbsp__uint64)1) << 51; + while ((bits & v) == 0) { + --expo; + v >>= 1; + } + } + } + + // find the decimal exponent as well as the decimal bits of the value + { + double ph, pl; + + // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046 + tens = expo - 1023; + tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1); + + // move the significant bits into position and stick them into an int + stbsp__raise_to_power10(&ph, &pl, d, 18 - tens); + + // get full as much precision from double-double as possible + stbsp__ddtoS64(bits, ph, pl); + + // check if we undershot + if (((stbsp__uint64)bits) >= stbsp__tento19th) + ++tens; + } + + // now do the rounding in integer land + frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits); + if ((frac_digits < 24)) { + stbsp__uint32 dg = 1; + if ((stbsp__uint64)bits >= stbsp__powten[9]) + dg = 10; + while ((stbsp__uint64)bits >= stbsp__powten[dg]) { + ++dg; + if (dg == 20) + goto noround; + } + if (frac_digits < dg) { + stbsp__uint64 r; + // add 0.5 at the right position and round + e = dg - frac_digits; + if ((stbsp__uint32)e >= 24) + goto noround; + r = stbsp__powten[e]; + bits = bits + (r / 2); + if ((stbsp__uint64)bits >= stbsp__powten[dg]) + ++tens; + bits /= r; + } + noround:; + } + + // kill long trailing runs of zeros + if (bits) { + stbsp__uint32 n; + for (;;) { + if (bits <= 0xffffffff) + break; + if (bits % 1000) + goto donez; + bits /= 1000; + } + n = (stbsp__uint32)bits; + while ((n % 1000) == 0) + n /= 1000; + bits = n; + donez:; + } + + // convert to string + out += 64; + e = 0; + for (;;) { + stbsp__uint32 n; + char *o = out - 8; + // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned) + if (bits >= 100000000) { + n = (stbsp__uint32)(bits % 100000000); + bits /= 100000000; + } else { + n = (stbsp__uint32)bits; + bits = 0; + } + while (n) { + out -= 2; + *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2]; + n /= 100; + e += 2; + } + if (bits == 0) { + if ((e) && (out[0] == '0')) { + ++out; + --e; + } + break; + } + while (out != o) { + *--out = '0'; + ++e; + } + } + + *decimal_pos = tens; + *start = out; + *len = e; + return ng; +} + +#undef stbsp__ddmulthi +#undef stbsp__ddrenorm +#undef stbsp__ddmultlo +#undef stbsp__ddmultlos +#undef STBSP__SPECIAL +#undef STBSP__COPYFP + +#endif // STB_SPRINTF_NOFLOAT + +// clean up +#undef stbsp__uint16 +#undef stbsp__uint32 +#undef stbsp__int32 +#undef stbsp__uint64 +#undef stbsp__int64 +#undef STBSP__UNALIGNED + +#endif // STB_SPRINTF_IMPLEMENTATION + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/lib/c/glad/glad.c b/src/lib/glad/glad.c similarity index 100% rename from lib/c/glad/glad.c rename to src/lib/glad/glad.c diff --git a/lib/c/glad/glad.h b/src/lib/glad/glad.h similarity index 100% rename from lib/c/glad/glad.h rename to src/lib/glad/glad.h diff --git a/lib/c/loaders/stb_image.h b/src/lib/loaders/stb_image.h similarity index 100% rename from lib/c/loaders/stb_image.h rename to src/lib/loaders/stb_image.h diff --git a/lib/c/loaders/tinyobj.h b/src/lib/loaders/tinyobj.h similarity index 100% rename from lib/c/loaders/tinyobj.h rename to src/lib/loaders/tinyobj.h diff --git a/src/main.cpp b/src/main.cpp index 675abec..33a613f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,39 +1,33 @@ -#include -#include -#include -#include #include -#include -#include #include #include -#include "glad/glad.h" +#include "lib/glad/glad.h" #include #include #include #include #include #include -#include "loaders/stb_image.h" +#include "lib/loaders/stb_image.h" -#include "gfx/geometry.h" #include "gfx/Texture.h" #include "gfx/Mesh.h" #include "gfx/Shader.h" #include "gfx/Color.h" #include "VoxelSpace.h" #include "SomaSolve.h" +#include "lib/djstdlib/core.cpp" struct Entity; struct Polycube; struct SceneGraphNode; -auto new_entity() -> int; -auto get_entity(int id) -> Entity*; -auto get_scene_graph_node(int id) -> SceneGraphNode*; -auto new_graph_node() -> int; +int new_entity(); +Entity *get_entity(int id); +SceneGraphNode *get_scene_graph_node(int id); +int new_graph_node(); -auto print_mat(glm::mat4* matrix) -> void { +void print_mat(glm::mat4* matrix) { auto mat = *matrix; std::cout << mat[0][0] << mat[0][1] << mat[0][2] << mat[0][3] << std::endl; std::cout << mat[1][0] << mat[1][1] << mat[1][2] << mat[1][3] << std::endl; @@ -49,19 +43,19 @@ struct Camera { glm::vec3 up; glm::vec3 target; - auto init(float aspect_ratio = 800.0f / 600.0f) -> void { + void init(float aspect_ratio = 800.0f / 600.0f) { view = glm::mat4(); proj = glm::perspective(glm::radians(45.0f), aspect_ratio, 0.1f, 100.0f); pos = glm::vec3(0.0f); up = glm::vec3(0.0f, 1.0f, 0.0f); } - auto look_at(float x, float y, float z) -> void { + void look_at(float x, float y, float z) { target = glm::vec3(x, y, z); view = glm::lookAt(pos, target, up); } - auto set_up(float up_x, float up_y, float up_z) -> void { + void set_up(float up_x, float up_y, float up_z) { up = glm::vec3(up_x, up_y, up_z); } }; @@ -69,7 +63,7 @@ struct Camera { struct GlobalAppState { int current_polycube; int last_polycube_visible; - Shader* active_shader; + Shader *active_shader; std::vector polycubes; }; GlobalAppState app_state; @@ -80,8 +74,8 @@ struct WindowDims { }; struct Entity { - Mesh* mesh; - Texture* tex; + Mesh *mesh; + Texture *tex; bool visible; int scene_graph_node; }; @@ -95,19 +89,19 @@ struct SceneGraphNode { std::vector children; std::optional entity; - auto reset() -> void { + void reset() { scale = glm::vec3(1.0f, 1.0f, 1.0f); translation = glm::vec3(0.0f, 0.0f, 0.0f); rotation = glm::quat(0.0f, 0.0f, 0.0f, 0.0f); } - auto init() -> void { + void init() { reset(); local = glm::mat4(1.0f); world = local; } - auto update_local() -> void { + void update_local() { local = glm::scale( glm::translate( glm::mat4(1.0f), @@ -122,8 +116,8 @@ struct Polycube { int graph_node; glm::vec3 color; - auto show() -> void { - auto node = get_scene_graph_node(graph_node); + void show() { + SceneGraphNode *node = get_scene_graph_node(graph_node); for (auto &child : node->children) { auto node = get_scene_graph_node(child); if (node->entity) { @@ -132,19 +126,19 @@ struct Polycube { } } - auto hide() -> void { - auto node = get_scene_graph_node(graph_node); - for (auto &child : node->children) { - auto node = get_scene_graph_node(child); + void hide() { + SceneGraphNode *node = get_scene_graph_node(graph_node); + for (int &child : node->children) { + SceneGraphNode *node = get_scene_graph_node(child); if (node->entity) { get_entity(*node->entity)->visible = false; } } } - auto get_centre() -> glm::vec3 { - auto centre = glm::vec3(0.0f); - for (auto &child : get_scene_graph_node(graph_node)->children) { + glm::vec3 get_centre() { + glm::vec3 centre = glm::vec3(0.0f); + for (int &child : get_scene_graph_node(graph_node)->children) { centre += get_scene_graph_node(child)->translation; } centre /= get_scene_graph_node(graph_node)->children.size(); @@ -159,17 +153,17 @@ struct Frame { int y; Camera* cam; - auto init(Camera* camera) -> void { + void init(Camera* camera) { camera->init((float)width / (float)height); cam = camera; } }; -auto framebuffer_size_callback(GLFWwindow* window, int width, int height) -> void { +void framebuffer_size_callback(GLFWwindow *window, int width, int height) { glViewport(0, 0, width, height); } -auto init_window_and_gl(WindowDims* window_dims) -> GLFWwindow* { +GLFWwindow *init_window_and_gl(WindowDims *window_dims) { glfwInit(); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6); @@ -193,19 +187,19 @@ auto init_window_and_gl(WindowDims* window_dims) -> GLFWwindow* { return window; } -auto gl_update_viewport(WindowDims* window_dims, Frame* frame) -> void { +void gl_update_viewport(WindowDims* window_dims, Frame* frame) { glViewport(frame->x, window_dims->height - frame->y - frame->height, frame->width, frame->height); } -auto cube_mesh = Mesh{}; -auto wall_tex = Texture{}; -auto entities = std::vector(); -auto scene_graph_nodes = std::vector(); +Mesh cube_mesh = {0}; +Texture wall_tex = {0}; +std::vector entities = std::vector(); +std::vector scene_graph_nodes = std::vector(); -auto process_input(GLFWwindow *window) -> void { - static auto wireframe = false; - static auto last_frame_state_press_enter = false; - static auto last_frame_state_press = false; +void process_input(GLFWwindow *window) { + static bool wireframe = false; + static bool last_frame_state_press_enter = false; + static bool last_frame_state_press = false; if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS) { glfwSetWindowShouldClose(window, true); @@ -232,7 +226,7 @@ auto process_input(GLFWwindow *window) -> void { } -auto new_entity() -> int { +int new_entity() { entities.emplace_back(); scene_graph_nodes.emplace_back(); entities.back().scene_graph_node = scene_graph_nodes.size(); @@ -240,21 +234,21 @@ auto new_entity() -> int { return entities.size(); } -auto get_entity(int id) -> Entity* { +Entity *get_entity(int id) { return &entities[id - 1]; } -auto get_scene_graph_node(int id) -> SceneGraphNode* { +SceneGraphNode *get_scene_graph_node(int id) { return &scene_graph_nodes[id - 1]; } -auto new_graph_node() -> int { +int new_graph_node() { scene_graph_nodes.emplace_back(); return scene_graph_nodes.size(); } -auto draw_entity(Entity* entity) -> void { - auto modelUniformLoc = glGetUniformLocation(app_state.active_shader->prog_id, "model"); +void draw_entity(Entity *entity) { + GLint modelUniformLoc = glGetUniformLocation(app_state.active_shader->prog_id, "model"); glUniformMatrix4fv(modelUniformLoc, 1, GL_FALSE, glm::value_ptr(get_scene_graph_node(entity->scene_graph_node)->world)); glBindTexture(GL_TEXTURE_2D, entity->tex->tex_id); glBindVertexArray(entity->mesh->vao); @@ -262,17 +256,17 @@ auto draw_entity(Entity* entity) -> void { //glDrawElements(GL_TRIANGLES, entity->mesh->num_indices, GL_UNSIGNED_INT, 0); } -auto create_polycube_from_repr(Voxel::Space* repr) -> Polycube { - auto polycube_id = new_graph_node(); +Polycube create_polycube_from_repr(Space *repr) { + int polycube_id = new_graph_node(); get_scene_graph_node(polycube_id)->init(); for (int x = 0; x < repr->dim_x; x++) { for (int y = 0; y < repr->dim_y; y++) { for (int z = 0; z < repr->dim_z; z++) { - if (Voxel::filledAt(repr, x, y, z)) { - auto polycube_segment = get_entity(new_entity()); + if (filledAt(repr, x, y, z)) { + Entity *polycube_segment = get_entity(new_entity()); polycube_segment->mesh=&cube_mesh, polycube_segment->tex=&wall_tex; - auto graph_node = get_scene_graph_node(polycube_segment->scene_graph_node); + SceneGraphNode *graph_node = get_scene_graph_node(polycube_segment->scene_graph_node); graph_node->init(); graph_node->translation = glm::vec3( -((repr->dim_z - 1)/2.0f) + z, @@ -285,67 +279,67 @@ auto create_polycube_from_repr(Voxel::Space* repr) -> Polycube { } } } - auto result = Polycube{ + Polycube result = { .graph_node=polycube_id, .color=glm::vec3(1.0f), }; return result; } -auto recalculate_scene_graph(SceneGraphNode* top) -> void { +void recalculate_scene_graph(SceneGraphNode *top) { if (top->children.size() == 0) { return; } - for (auto &node_id : top->children) { - auto graph_node = get_scene_graph_node(node_id); + for (int &node_id : top->children) { + SceneGraphNode *graph_node = get_scene_graph_node(node_id); graph_node->update_local(); graph_node->world = top->world * graph_node->local; recalculate_scene_graph(graph_node); } } -auto main_cmd() -> int { - SomaSolve::interactive_cmd_line_solve_soma(); +int main_cmd() { + interactive_cmd_line_solve_soma(); return 0; } -auto main_gfx() -> int { - auto window_dims = WindowDims{ 800, 600 }; - auto window = init_window_and_gl(&window_dims); - if (window == nullptr) { +int main_gfx() { + WindowDims window_dims = { 800, 600 }; + GLFWwindow *window = init_window_and_gl(&window_dims); + if (!window) { return -1; } - app_state = GlobalAppState{ + app_state = { .current_polycube=0, .last_polycube_visible=6, - .active_shader=nullptr, + .active_shader=0, .polycubes={}, }; - auto phong_shader = Shader{}; + Shader phong_shader = {0}; phong_shader.init("../assets/shaders/phong-solid.vertex.glsl", "../assets/shaders/phong-solid.fragment.glsl"); app_state.active_shader = &phong_shader; cube_mesh.init("../assets/models/c000000.obj"); wall_tex.init("../assets/textures/brick-wall.jpg"); - auto little_frame = Frame{ .width=80, .height=60, .x=20, .y=20 }; - auto big_frame = Frame{ .width=800, .height=600, .x=0, .y=0 }; - auto main_cam = Camera{}; - auto other_cam = Camera{}; + Frame little_frame = { .width=80, .height=60, .x=20, .y=20 }; + Frame big_frame = { .width=800, .height=600, .x=0, .y=0 }; + Camera main_cam = {}; + Camera other_cam = {}; little_frame.init(&other_cam); big_frame.init(&main_cam); - auto frames = std::vector{ &big_frame, &little_frame }; + std::vector frames = { &big_frame, &little_frame }; - auto root_node = SceneGraphNode{}; + SceneGraphNode root_node = {}; root_node.init(); - for (int i = 0; i < SomaSolve::STD_SOMA.size(); i++) { - auto voxel_space = Voxel::Space{ SomaSolve::STD_SOMA[i], 3, 3, 3 }; - Voxel::cullEmptySpace(&voxel_space); - auto polycube = create_polycube_from_repr(&voxel_space); - polycube.color = Color::color_from_index(i); + for (int i = 0; i < STD_SOMA.size(); i++) { + auto voxel_space = Space{ STD_SOMA[i], 3, 3, 3 }; + cullEmptySpace(&voxel_space); + Polycube polycube = create_polycube_from_repr(&voxel_space); + polycube.color = color_from_index(i); app_state.polycubes.push_back(polycube); root_node.children.push_back(app_state.polycubes.back().graph_node); } @@ -353,18 +347,18 @@ auto main_gfx() -> int { main_cam.pos = glm::vec3(4.0f, 4.0f, 4.0f); main_cam.look_at(0.0f, 0.0f, 0.0f); - auto light_pos = glm::vec3(6.0f); + glm::vec3 light_pos = glm::vec3(6.0f); glUseProgram(app_state.active_shader->prog_id); - auto view_loc = glGetUniformLocation(app_state.active_shader->prog_id, "view"); - auto proj_loc = glGetUniformLocation(app_state.active_shader->prog_id, "projection"); - auto light_pos_loc = glGetUniformLocation(app_state.active_shader->prog_id, "light_pos"); + GLint view_loc = glGetUniformLocation(app_state.active_shader->prog_id, "view"); + GLint proj_loc = glGetUniformLocation(app_state.active_shader->prog_id, "projection"); + GLint light_pos_loc = glGetUniformLocation(app_state.active_shader->prog_id, "light_pos"); glUniform3fv(light_pos_loc, 1, glm::value_ptr(light_pos)); glUniformMatrix4fv(proj_loc, 1, GL_FALSE, glm::value_ptr(main_cam.proj)); glUniformMatrix4fv(view_loc, 1, GL_FALSE, glm::value_ptr(main_cam.view)); - auto last_frame = glfwGetTime(); - auto time_delta = 1.0f/60.0f; + real32 last_frame = glfwGetTime(); + real32 time_delta = 1.0f/60.0f; while (!glfwWindowShouldClose(window)) { time_delta = glfwGetTime() - last_frame; process_input(window); @@ -379,16 +373,16 @@ auto main_gfx() -> int { glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); gl_update_viewport(&window_dims, &big_frame); - auto current_polycube = &app_state.polycubes[app_state.current_polycube]; + Polycube *current_polycube = &app_state.polycubes[app_state.current_polycube]; get_scene_graph_node(current_polycube->graph_node)->rotation = glm::quat(glm::vec3(0, glfwGetTime() / 2, 0)); glBindVertexArray(cube_mesh.vao); //glBindTexture(GL_TEXTURE_2D, entity.tex->tex_id); recalculate_scene_graph(&root_node); - auto model_uniform_loc = glGetUniformLocation(app_state.active_shader->prog_id, "model"); - auto solid_color_loc = glGetUniformLocation(app_state.active_shader->prog_id, "solid_color"); + GLint model_uniform_loc = glGetUniformLocation(app_state.active_shader->prog_id, "model"); + GLint solid_color_loc = glGetUniformLocation(app_state.active_shader->prog_id, "solid_color"); glUniform3fv(solid_color_loc, 1, glm::value_ptr(current_polycube->color)); - for (auto &entity : entities) { + for (Entity &entity : entities) { if (entity.visible) { glUniformMatrix4fv(model_uniform_loc, 1, GL_FALSE, glm::value_ptr(get_scene_graph_node(entity.scene_graph_node)->world)); glDrawArrays(GL_TRIANGLES, 0, entity.mesh->num_indices); @@ -404,7 +398,7 @@ auto main_gfx() -> int { return 0; } -auto main() -> int { +int main() { return main_cmd(); } diff --git a/src/main.zig b/src/main.zig deleted file mode 100644 index 7214248..0000000 --- a/src/main.zig +++ /dev/null @@ -1,402 +0,0 @@ -const std = @import("std"); -const c = @import("c.zig"); -const zm = @import("zm"); - -const Mesh = @import("gfx/Mesh.zig").Mesh; - -const ArrayList = std.ArrayList; - -fn print_mat(matrix: *const zm.Mat) void { - std.debug.print("{}, {}, {}, {}\n", .{ matrix[0][0], matrix[0][1], matrix[0][2], matrix[0][3] }); - std.debug.print("{}, {}, {}, {}\n", .{ matrix[1][0], matrix[1][1], matrix[1][2], matrix[1][3] }); - std.debug.print("{}, {}, {}, {}\n", .{ matrix[2][0], matrix[2][1], matrix[2][2], matrix[2][3] }); - std.debug.print("{}, {}, {}, {}\n", .{ matrix[3][0], matrix[3][1], matrix[3][2], matrix[3][3] }); -} - -const Camera = struct { - view: zm.Mat = .{ zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0), zm.f32x4s(0.0) }, - proj: zm.Mat, - pos: zm.Vec = zm.f32x4s(0.0), - up: zm.Vec = zm.f32x4s(0.0), - target: zm.Vec, - - pub fn init(self: Camera, aspect_ratio: f32) void { - self.proj = zm.perspectiveFovRh(std.math.degreesToRadians(45.0), aspect_ratio, 0.1, 100.0); - } - - pub fn new(aspect_ratio: f32) Camera { - const cam = Camera{}; - init(cam, aspect_ratio); - return cam; - } - - pub fn look_at(self: Camera, x: f32, y: f32, z: f32) void { - self.target = zm.f32x4(x, y, z, 0.0); - self.view = zm.lookAtRh(self.pos, self.target, self.up); - } - - pub fn set_up(self: Camera, up_x: f32, up_y: f32, up_z: f32) void { - self.up = zm.f32x4(up_x, up_y, up_z, 0.0); - } -}; - -const GlobalAppState = struct { - current_polycube: i32, - last_polycube_visible: i32, - active_shader: ?*Shader, - polycubes: ArrayList(Polycube), -}; - -const app_state: GlobalAppState = .{}; - -const WindowDims = struct { - width: u32, - height: u32, -}; - -const Entity = struct { - mesh: *Mesh, - tex: *Texture, - visible: bool, - scene_graph_node: i32, -}; - -const SceneGraphNode = struct { - local: zm.Mat, - world: zm.Mat, - translation: zm.Vec, - rotation: zm.Quat, - scale: zm.Vec, - children: ArrayList(i32), - entity: ?i32, - - pub fn reset(self: SceneGraphNode) void { - self.scale = zm.f32x4(1.0, 1.0, 1.0, 0.0); - self.translation = zm.f32x4s(0.0); - self.rotation = zm.f32x4s(0.0); - } - - pub fn init(self: SceneGraphNode) void { - self.reset(); - self.local = zm.identity(); - self.world = self.local; - } - - pub fn update_local(self: SceneGraphNode) void { - const scaling = zm.scaling(self.scale); - const translation = zm.translation(self.translation); - const rotation = zm.quatToMat(self.rotation); - self.local = zm.mul(zm.mul(translation, rotation), scaling); - self.local = scaling( - zm.translate( - zm.identity(), - self.translation - ) * toMat4(self.rotation), - self.scale - ); - } -}; - - -const Polycube = struct { - graph_node: i32, - color: zm.Vec, - - pub fn show(self: Polycube) void { - const node = get_scene_graph_node(self.graph_node); - for (node.children.items) |child_id| { - const child_node = get_scene_graph_node(child_id); - if (child_node.entity) |entity_id| { - get_entity(entity_id).visible = true; - } - } - } - - pub fn hide(self: Polycube) void { - const node = get_scene_graph_node(self.graph_node); - for (node.children.items) |child_id| { - const child_node = get_scene_graph_node(child_id); - if (child_node.entity) |entity_id| { - get_entity(entity_id).visible = false; - } - } - } - - pub fn get_centre(self: Polycube) zm.Vec { - const centre = zm.Vec(0.0); - for (get_scene_graph_node(self.graph_node).children.items) |child_id| { - centre += get_scene_graph_node(child_id).translation; - } - centre /= get_scene_graph_node(self.graph_node).children.size(); - return centre; - } -}; - -const Frame = struct { - width: i32, - height: i32, - x: i32, - y: i32, - cam: *Camera, - - pub fn new(camera: *Camera, width: i32, height: i32) Frame { - const frame = Frame{}; - camera.init(@as(f32, width) / @as(f32, height)); - frame.cam = camera; - return frame; - } -}; - -fn framebuffer_size_callback(width: i32, height: i32) void { - c.glViewport(0, 0, width, height); -} - -fn init_window_and_gl(window_dims: *WindowDims) ?*c.GLFWwindow { - c.glfwInit(); - c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MAJOR, 4); - c.glfwWindowHint(c.GLFW_CONTEXT_VERSION_MINOR, 6); - c.glfwWindowHint(c.GLFW_OPENGL_PROFILE, c.GLFW_OPENGL_CORE_PROFILE); - const window = c.glfwCreateWindow(window_dims.width, window_dims.height, "Somaesque", c.NULL, c.NULL); - if (window == c.NULL) { - std.debug.print("Failed to create GLFW window"); - c.glfwTerminate(); - return null; - } - c.glfwMakeContextCurrent(window); - - if (!c.gladLoadGLLoader(@as(c.GLADloadproc, c.glfwGetProcAddress))) { - std.debug.print("Failed to initialize GLAD"); - return null; - } - - c.glViewport(0, 0, 800, 600); - c.glfwSetFramebufferSizeCallback(window, framebuffer_size_callback); - c.glEnable(c.GL_DEPTH_TEST); - return window; -} - -fn gl_update_viewport(window_dims: *WindowDims, frame: *Frame) void { - c.glViewport(frame.x, window_dims.height - frame.y - frame.height, frame.width, frame.height); -} - -const cube_mesh = Mesh{}; -const wall_tex = Texture{}; -const entities = ArrayList(Entity); -const scene_graph_nodes = ArrayList(SceneGraphNode); - -fn process_input(window: *c.GLFWwindow) void { - const static = struct { - wireframe: bool = false, - last_frame_state_press_enter: bool = false, - last_frame_state_press: bool = false, - }; - - if (c.glfwGetKey(window, c.GLFW_KEY_ESCAPE) == c.GLFW_PRESS) { - c.glfwSetWindowShouldClose(window, true); - } - - if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_PRESS and !static.last_frame_state_press) { - c.glPolygonMode(c.GL_FRONT_AND_BACK, if (!static.wireframe) c.GL_LINE else c.GL_FILL); - static.wireframe = !static.wireframe; - static.last_frame_state_press = true; - } else if (c.glfwGetKey(window, c.GLFW_KEY_SPACE) == c.GLFW_RELEASE) { - static.last_frame_state_press = false; - } - - if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_PRESS and !static.last_frame_state_press_enter) { - if (app_state.current_polycube == 6) { - app_state.current_polycube = 0; - } else { - app_state.current_polycube += 1; - } - static.last_frame_state_press_enter = true; - } else if (c.glfwGetKey(window, c.GLFW_KEY_ENTER) == c.GLFW_RELEASE) { - static.last_frame_state_press_enter = false; - } -} - - -fn new_entity() i32 { - entities.append(.{}); - scene_graph_nodes.append(.{}); - entities.items[entities.items.len - 1].scene_graph_node = scene_graph_nodes.items.len; - scene_graph_nodes.items[scene_graph_nodes.items.len - 1].entity = entities.items.len; - return entities.items.len; -} - -fn get_entity(id: i32) ?*Entity { - if (entities.items[id - 1]) { - return &entities.items[id - 1]; - } - return null; -} - -fn get_scene_graph_node(id: i32) *SceneGraphNode { - if (scene_graph_nodes.items[id - 1]) { - return &scene_graph_nodes.items[id - 1]; - } - return null; -} - -fn new_graph_node() i32 { - scene_graph_nodes.append(.{}); - return scene_graph_nodes.items.len; -} - -fn draw_entity(entity: *Entity) void { - const modelUniformLoc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model"); - c.glUniformMatrix4fv(modelUniformLoc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world); - c.glBindTexture(c.GL_TEXTURE_2D, entity.tex.tex_id); - c.glBindVertexArray(entity.mesh.vao); - c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices); - //c.glDrawElements(c.GL_TRIANGLES, entity.mesh.num_indices, c.GL_UNSIGNED_INT, 0); -} - -fn create_polycube_from_repr(repr: *Voxel.Space) Polycube { - const polycube_id = new_graph_node(); - get_scene_graph_node(polycube_id).init(); - var x: usize = 1; - var y: usize = 1; - var z: usize = 1; - while (x < repr.dim_x) : (x += 1) { - while (y < repr.dim_y) : (y += 1) { - while (z < repr.dim_z) : (z += 1) { - if (Voxel.filledAt(repr, x, y, z)) { - const polycube_segment = get_entity(new_entity()); - polycube_segment.mesh = &cube_mesh; - polycube_segment.tex = &wall_tex; - const graph_node = get_scene_graph_node(polycube_segment.scene_graph_node); - graph_node.init(); - graph_node.translation = zm.f32x4( - -((repr.dim_z - 1)/2.0) + z, - ((repr.dim_x - 1)/2.0) - x, - -((repr.dim_y - 1)/2.0) + y, - 0.0, - ); - graph_node.update_local(); - get_scene_graph_node(polycube_id).children.append(polycube_segment.scene_graph_node); - } - } - } - } - const result = Polycube{ - .graph_node = polycube_id, - .color = zm.f32x4s(1.0), - }; - return result; -} - -fn recalculate_scene_graph(top: *SceneGraphNode) void { - if (top.children.size() == 0) { - return; - } - for (top.children.items) |child_id| { - const graph_node = get_scene_graph_node(child_id); - graph_node.update_local(); - graph_node.world = zm.mul(top.world, graph_node.local); - recalculate_scene_graph(graph_node); - } -} - -pub fn main() void { - const window_dims = WindowDims{ 800, 600 }; - const window = init_window_and_gl(&window_dims); - if (window == null) { - return -1; - } - - app_state = GlobalAppState{ - .current_polycube=0, - .last_polycube_visible=6, - .active_shader=null, - .polycubes={}, - }; - - const phong_shader = Shader{}; - phong_shader.init("../assets/shaders/phong-solid.vertex.glsl", "../assets/shaders/phong-solid.fragment.glsl"); - app_state.active_shader = &phong_shader; - - cube_mesh.init("../assets/models/c000000.obj"); - wall_tex.init("../assets/textures/brick-wall.jpg"); - - const little_frame = Frame{ .width=80, .height=60, .x=20, .y=20 }; - const big_frame = Frame{ .width=800, .height=600, .x=0, .y=0 }; - const main_cam = Camera{}; - const other_cam = Camera{}; - little_frame.init(&other_cam); - big_frame.init(&main_cam); - const frames = [_]*Frame{ &big_frame, &little_frame }; - - const root_node = SceneGraphNode{}; - root_node.init(); - - var i: usize = 0; - while (i < SomaSolve.STD_SOMA.items.len) : (i += 1) { - const voxel_space = voxel.Space{ SomaSolve.STD_SOMA[i], 3, 3, 3 }; - voxel.cullEmptySpace(&voxel_space); - const polycube = create_polycube_from_repr(&voxel_space); - polycube.color = color.color_from_index(i); - app_state.polycubes.append(polycube); - root_node.children.append(app_state.polycubes.items[app_state.polycubes.items.len - 1].graph_node); - } - - main_cam.pos = zm.f32x4(4.0, 4.0, 4.0, 0.0); - main_cam.look_at(0.0, 0.0, 0.0); - - const light_pos = zm.f32x4(6.0, 6.0, 6.0, 0.0); - - c.glUseProgram(app_state.active_shader.prog_id); - const view_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "view"); - const proj_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "projection"); - const light_pos_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "light_pos"); - c.glUniform3fv(light_pos_loc, 1, &light_pos); - c.glUniformMatrix4fv(proj_loc, 1, GL_FALSE, &main_cam.proj); - c.glUniformMatrix4fv(view_loc, 1, GL_FALSE, &main_cam.view); - - var last_frame = c.glfwGetTime(); - var time_delta = 1.0/60.0; - while (!c.glfwWindowShouldClose(window)) { - time_delta = c.glfwGetTime() - last_frame; - process_input(window); - - if (app_state.last_polycube_visible != app_state.current_polycube) { - app_state.polycubes[app_state.last_polycube_visible].hide(); - app_state.polycubes[app_state.current_polycube].show(); - app_state.last_polycube_visible = app_state.current_polycube; - } - - c.glClearColor(0.0, 0.0, 0.0, 1.0); - c.glClear(c.GL_DEPTH_BUFFER_BIT | c.GL_COLOR_BUFFER_BIT); - - c.gl_update_viewport(&window_dims, &big_frame); - const current_polycube = &app_state.polycubes[app_state.current_polycube]; - c.get_scene_graph_node(current_polycube.graph_node).rotation = zm.quatFromRollPitchYaw(0.0, c.glfwGetTime() / 2.0, 0.0); - - c.glBindVertexArray(cube_mesh.vao); - //glBindTexture(GL_TEXTURE_2D, entity.tex->tex_id); - recalculate_scene_graph(&root_node); - const model_uniform_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "model"); - const solid_color_loc = c.glGetUniformLocation(app_state.active_shader.prog_id, "solid_color"); - c.glUniform3fv(solid_color_loc, 1, ¤t_polycube.color); - while (entities.items) |entity| { - if (entity.visible) { - c.glUniformMatrix4fv(model_uniform_loc, 1, c.GL_FALSE, &get_scene_graph_node(entity.scene_graph_node).world); - c.glDrawArrays(c.GL_TRIANGLES, 0, entity.mesh.num_indices); - //glDrawElements(GL_TRIANGLES, entity->mesh->num_indices, GL_UNSIGNED_INT, 0); - } - } - - c.glfwSwapBuffers(window); - c.glfwPollEvents(); - } - - c.glfwTerminate(); - return 0; -} - -//test "simple test" { -// var list = std.ArrayList(i32).init(std.testing.allocator); -// defer list.deinit(); // try commenting this out and see if zig detects the memory leak! -// try list.append(42); -// try std.testing.expectEqual(@as(i32, 42), list.pop()); -//}